Пример #1
0
def readSMAP(varLst, usgsIdLst):
    tSMAPRange = [20150402, 20180401]
    tSMAPLst = tRange2Array(tSMAPRange)
    dataDir = "/scratch/feng/extractData/SMAPInv"
    ntime = len(tSMAPLst)
    x = np.empty([len(usgsIdLst), ntime, len(varLst)])
    for k in range(len(usgsIdLst)):
        dataTemp = readcsvGage(dataDir, usgsIdLst[k], varLst, ntime)
        x[k, :, :] = dataTemp
    # load the statistics file and transform back
    with open(os.path.join(dataDir, "statDictOri.json"), "r") as fp:
        smapstaDict = json.load(fp)
    for ivar in range(len(varLst)):
        x[:, :, ivar] = (x[:, :, ivar] * smapstaDict[varLst[ivar]][3] +
                         smapstaDict[varLst[ivar]][2])

    # get the new statDict of SMAP
    statnewFile = os.path.join(dataDir, "statDictNew.json")
    if not os.path.isfile(statnewFile):
        smapnewDict = dict()
        for ivar in range(len(varLst)):
            var = varLst[ivar]
            smapnewDict[var] = calStat(x[:, :, ivar])
        with open(statnewFile, "w") as fp:
            json.dump(smapnewDict, fp, indent=4)

    with open(statnewFile, "r") as fp:
        smapDict = json.load(fp)

    return x, tSMAPLst, smapDict  # x is transformed back
Пример #2
0
 def getCSV(self,
            *,
            doNorm=True,
            rmNan=True,
            dataRange=[20150401, 20201002],
            readRange=[20150402, 20160402],
            csvdataDir="/scratch/feng/extractData/SMAP/csv/SMAPUpdate/",
            csvvarLst=["soil_moisture_pm"]):
     data, tcsvdataLst, csvstatDict = readCSV(
         dataDir=csvdataDir,
         dataRange=dataRange,
         varLst=csvvarLst,
         usgsIdLst=self.usgsId,
     )  # gage, time, var
     readtLst = tRange2Array(readRange)
     C, ind1, ind2 = np.intersect1d(readtLst,
                                    tcsvdataLst,
                                    return_indices=True)
     data = data[:, ind2, :]
     if doNorm is True:
         for ivar in range(len(csvvarLst)):
             tempvar = csvvarLst[ivar]
             data[:, :, ivar] = (data[:, :, ivar] - csvstatDict[tempvar][2]
                                 ) / csvstatDict[tempvar][3]
     if rmNan is True:
         data[np.where(np.isnan(data))] = 0
     return data
Пример #3
0
def readhour(varLst, usgsIdLst):
    thourRange = [19851001, 20051001]
    thourLst = tRange2Array(thourRange)
    dataDir = "/scratch/feng/extractData/NLDAS/csvLst/NLDAS"
    ntime = len(thourLst) * 24
    x = np.empty([len(usgsIdLst), ntime, len(varLst)])
    for k in range(len(usgsIdLst)):
        dataTemp = readcsvGage(dataDir, usgsIdLst[k], varLst, ntime)
        x[k, :, :] = dataTemp

    return x, thourLst
Пример #4
0
def readSAC(tRangeLst):
    outpathSAC = pathCamels["Out"] + "/trend/SAC"
    tSACRange = [19801001, 20150101]
    tSACLst = tRange2Array(tSACRange)
    ## load SAC-SMA prediction
    fname_predSAC = outpathSAC + "/predSAC.npy"
    predSAC = np.load(fname_predSAC, allow_pickle=True)
    C, ind1, ind2 = np.intersect1d(tRangeLst, tSACLst, return_indices=True)
    dataPred = predSAC[:, ind2]
    dataPred = np.expand_dims(dataPred, 2)
    return dataPred  # Ngage*Ntime*Nvar
Пример #5
0
def readLstm(tRangeLst):
    tLstmRange = [19801001, 20150101]
    tLstmLst = tRange2Array(tLstmRange)
    lstmDir = "EnsemRun/DI_N/PNorm/SAC-LSTM/epochs300_batch100_rho365_hiddensize256_Tstart19801001_Tend19951001"
    outpathLstm = os.path.join(
        pathCamels["Out"],
        lstmDir,
        "All-90-95",
        str(tLstmRange[0]) + "_" + str(tLstmRange[1]),
    )
    ## load Lstm prediction
    fname_predLstm = outpathLstm + "/pred.npy"
    predLstm = np.load(fname_predLstm, allow_pickle=True)
    predLstm = np.nanmean(predLstm, axis=0)
    C, ind1, ind2 = np.intersect1d(tRangeLst, tLstmLst, return_indices=True)
    dataPred = predLstm[:, ind2, :]
    return dataPred  # Ngage*Ntime*Nvar
Пример #6
0
 def getSMAP(self,
             *,
             doNorm=True,
             rmNan=True,
             SMAPinvrange=[20150402, 20160402]):
     varsmapLst = ["APCP", "TMP", "PEVAP", "SMAP"]
     data, tSMAPLst, smapDict = readSMAP(
         varLst=varsmapLst,
         usgsIdLst=self.usgsId)  # gage, time, var: 1 precip
     SMAPinvt = tRange2Array(SMAPinvrange)
     C, ind1, ind2 = np.intersect1d(SMAPinvt, tSMAPLst, return_indices=True)
     data = data[:, ind2, :]
     if doNorm is True:
         for ivar in range(len(varsmapLst)):
             tempvar = varsmapLst[ivar]
             data[:, :,
                  ivar] = (data[:, :, ivar] -
                           smapDict[tempvar][2]) / smapDict[tempvar][3]
     if rmNan is True:
         data[np.where(np.isnan(data))] = 0
     return data
Пример #7
0
 def __init__(self, *, subset="All", tRange):
     self.subset = subset
     if subset == "All":  # change to read subset later
         self.usgsId = gageDict["id"]
         crd = np.zeros([len(self.usgsId), 2])
         crd[:, 0] = gageDict["lat"]
         crd[:, 1] = gageDict["lon"]
         self.crd = crd
     elif type(subset) is list:
         self.usgsId = np.array(subset)
         crd = np.zeros([len(self.usgsId), 2])
         ind = np.full(len(self.usgsId), np.nan).astype(int)
         for ii in range(len(self.usgsId)):
             tempind = np.where(gageDict["id"] == self.usgsId[ii])
             ind[ii] = tempind[0][0]
         crd[:, 0] = gageDict["lat"][ind]
         crd[:, 1] = gageDict["lon"][ind]
         self.crd = crd
     else:
         raise Exception("The format of subset is not correct!")
     self.time = tRange2Array(tRange)
Пример #8
0
def readCSV(dataDir, dataRange, varLst, usgsIdLst):
    tdataRangeLst = tRange2Array(dataRange)
    ntime = len(tdataRangeLst)
    x = np.empty([len(usgsIdLst), ntime, len(varLst)])
    for k in range(len(usgsIdLst)):
        dataTemp = readcsvGage(dataDir, usgsIdLst[k], varLst, ntime)
        x[k, :, :] = dataTemp
    # make -9999 as np.nan
    x[x <= -999] = np.nan

    # get the statistics for normalization, write to a dict
    statnewFile = os.path.join(dataDir, "statDictCSV.json")
    if not os.path.isfile(statnewFile):
        statnewDict = dict()
        for ivar in range(len(varLst)):
            var = varLst[ivar]
            statnewDict[var] = calStat(x[:, :, ivar])
        with open(statnewFile, "w") as fp:
            json.dump(statnewDict, fp, indent=4)

    with open(statnewFile, "r") as fp:
        statcsvDict = json.load(fp)

    return x, tdataRangeLst, statcsvDict
Пример #9
0
            predLst_res = list()
            obsLst_res = list()
            statDictLst = []
            for i, out in enumerate(outLst):
                #df, pred, obs = master.test(out, TempTarget, forcing_path[i], attr_path[i], tRange=tRange, subset=subset, basinnorm=True, epoch=TestEPOCH, reTest=True)
                df, pred, obs, x = master.test(out, TempTarget, forcing_path, attr_path, D_N_P_path, tRange=tRange, subset=subset, basinnorm=False, epoch=TestEPOCH, reTest=True)

                # change the units ft3/s to m3/s
                #obs = obs * 0.0283168
                #pred = pred * 0.0283168
                ### We are substitute the obs into NaN outside the testing period, according to D_N_P file
                D_N_P = pd.read_excel(D_N_P_path)
                for ii in range(obs.shape[0]):
                    tLst1 = D_N_P.iloc[ii]['S_Testing']
                    tLst2 = D_N_P.iloc[ii]['E_Testing']
                    tArray1 = time.tRange2Array([int(str(tLst1.year) + str(tLst1.month).zfill(2) + str(tLst1.day).zfill(2)),
                                                 int(str(tLst2.year) + str(tLst2.month).zfill(2) + str(tLst2.day).zfill(2))])
                    tArray2 = time.tRange2Array(tRange)
                    C, ind1, ind2 = np.intersect1d(tArray1, tArray2, return_indices=True)
                    obs[ii, ~ind2, :] = np.nan




                #####################################################################

                predLst.append(pred) # the prediction list for all the models
                obsLst.append(obs)
                np.save(os.path.join(out, 'pred.npy'), pred)
                np.save(os.path.join(out, 'obs.npy'), obs)
                f = np.load(os.path.join(out, 'x.npy'))  # it has been saved previously in the out directory (forcings)
                T = (f[:, :, 3] + f[:, :, 4]) / 2    # mean air T for T_residual
Пример #10
0
# read camels dataset
import os
import pandas as pd
import numpy as np
import datetime as dt
from hydroDL import utils, pathCamels
from hydroDL.utils.time import tRange2Array
from pandas.api.types import is_numeric_dtype, is_string_dtype
import time
import json
from hydroDL.dataset import Dataframe

# module variable
tRange = [19800101, 20150101]
tRangeobs = [19790101, 20150101]  # streamflow observations
tLst = tRange2Array(tRange)
tLstobs = tRange2Array(tRangeobs)
nt = len(tLst)
ntobs = len(tLstobs)
# forcingLst = ['dayl', 'prcp', 'srad', 'swe', 'tmax', 'tmin', 'vp']
forcingLst = ["dayl", "prcp", "srad", "tmax", "tmin", "vp"]
attrLstSel = [
    "elev_mean",
    "slope_mean",
    "area_gages2",
    "frac_forest",
    "lai_max",
    "lai_diff",
    "dom_land_cover_frac",
    "dom_land_cover",
    "root_depth_50",