Пример #1
0
df0 = pd.read_csv(os.path.join(dirInv, 'codeCount.csv'), dtype={
    'siteNo': str
}).set_index('siteNo')
df1 = pd.read_csv(os.path.join(dirInv, 'codeCount_B2000.csv'),
                  dtype={
                      'siteNo': str
                  }).set_index('siteNo')
df2 = pd.read_csv(os.path.join(dirInv, 'codeCount_A2000.csv'),
                  dtype={
                      'siteNo': str
                  }).set_index('siteNo')
code = '00955'

# silica num > 100 in both training and testing (named silica64)
siteNoLst = df0[(df1[code] > 100) & (df2[code] > 100)].index.tolist()
if not waterQuality.exist('Silica64'):
    wqData = waterQuality.DataModelWQ.new('Silica64', siteNoLst)
wqData = waterQuality.DataModelWQ('Silica64')
indYr1 = waterQuality.indYr(wqData.info, yrLst=[1979, 2000])[0]
# wqData.saveSubset('Y8090', indYr1)
indYr2 = waterQuality.indYr(wqData.info, yrLst=[2000, 2020])[0]
# wqData.saveSubset('Y0010', indYr2)

# subset only have silica
ic = wqData.varC.index(code)
indC = np.where(~np.isnan(wqData.c[:, ic]))[0]
wqData.saveSubset(code, indC)
indYr1 = waterQuality.indYr(wqData.info.iloc[indC], yrLst=[1979, 2000])[0]
# wqData.saveSubset('{}-Y8090'.format(code), indYr1)
indYr2 = waterQuality.indYr(wqData.info.iloc[indC], yrLst=[2000, 2020])[0]
# wqData.saveSubset('{}-Y0010'.format(code), indYr2)
Пример #2
0
import torch
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from hydroDL.model import rnn, crit, trainTS
import time

siteNo = '401733105392404'
codeLst = ['00915', '00940', '00955']
# codeLst = ['00915', '00955']
nh = 256
batchSize = [365, 50]
if not waterQuality.exist(siteNo):
    wqData = waterQuality.DataModelWQ.new(siteNo, [siteNo])
wqData = waterQuality.DataModelWQ(siteNo)
varX = wqData.varF
varXC = wqData.varG
varY = [wqData.varQ[0]]
varYC = codeLst
varTup = (varX, varXC, varY, varYC)
dataTup, statTup = wqData.transIn(varTup=varTup)
dataTup = trainTS.dealNaN(dataTup, [1, 1, 0, 0])
sizeLst = trainTS.getSize(dataTup)
[nx, nxc, ny, nyc, nt, ns] = sizeLst

tabG = gageII.readData(varLst=varXC, siteNoLst=[siteNo])
tabG = gageII.updateCode(tabG)
dfX = waterQuality.readSiteX(siteNo, varX, nFill=5)
Пример #3
0
                  dtype={'siteNo': str}).set_index('siteNo')
df2 = pd.read_csv(os.path.join(dirInv, 'codeCount_A2000.csv'),
                  dtype={'siteNo': str}).set_index('siteNo')

# pick some sites
codeLst = ['00300', '00915']
tempLst = list()
for code in codeLst:
    temp = df0[(df1[code] > 100) & (df2[code] > 100)].index.tolist()
    # temp = df0[df0[code] > 200].index.tolist()
    tempLst.append(temp)
siteNoLst = tempLst[0]
for k in range(1, len(tempLst)):
    siteNoLst = list(set(siteNoLst).intersection(tempLst[k]))

if not waterQuality.exist('CaO49'):
    wqData = waterQuality.DataModelWQ.new('CaO49', siteNoLst)
indYr1 = waterQuality.indYr(wqData.info, yrLst=[1979, 2000])[0]
wqData.saveSubset('Y8090', indYr1)
indYr2 = waterQuality.indYr(wqData.info, yrLst=[2000, 2020])[0]
wqData.saveSubset('Y0010', indYr2)


# subset only have Ca and O
ic = [wqData.varC.index(code) for code in codeLst]
indC = np.where(~np.isnan(wqData.c[:, ic]))[0]
wqData.saveSubset('CaO', indC)
indYr1 = waterQuality.indYr(wqData.info.iloc[indC], yrLst=[1979, 2000])[0]
wqData.saveSubset('CaO-Y8090', indYr1)
indYr2 = waterQuality.indYr(wqData.info.iloc[indC], yrLst=[2000, 2020])[0]
wqData.saveSubset('CaO-Y0010', indYr2)
Пример #4
0
from hydroDL.app import waterQuality
from hydroDL.data import gageII
from hydroDL.master import basins

import pandas as pd
import numpy as np
import os
import time

# all gages
fileSiteNo = os.path.join(kPath.dirData, 'USGS', 'inventory', 'siteNoLst-1979')
siteNoLstAll = pd.read_csv(fileSiteNo, header=None, dtype=str)[0].tolist()
dfHBN = pd.read_csv(os.path.join(kPath.dirData, 'USGS', 'inventory', 'HBN.csv'), dtype={
    'siteNo': str}).set_index('siteNo')
siteNoHBN = [siteNo for siteNo in dfHBN.index.tolist()
             if siteNo in siteNoLstAll]

# wrap up data
caseName = 'HBN-30d'
if waterQuality.exist(caseName):
    wqData = waterQuality.DataModelWQ(caseName)
else:
    wqData = waterQuality.DataModelWQ.new(caseName, siteNoHBN, rho=30)
    ind1 = wqData.indByRatio(0.8)
    ind2 = wqData.indByRatio(0.2, first=False)
    wqData.saveSubset(['first80', 'last20'], [ind1, ind2])

basins.trainModelTS('HBN', 'first80', batchSize=[None, 500], saveName='HBN_opt1', optQ=1)

a, b = [200, None]
Пример #5
0
# # select referenced basins
tabSel = gageII.readData(varLst=['CLASS'], siteNoLst=siteNoLstAll)
tabSel = gageII.updateCode(tabSel)
siteNoRef = tabSel[tabSel['CLASS'] == 1].index.tolist()

# shapefiles
usgsDir = os.path.join(kPath.dirData, 'USGS')
outShapeFile = os.path.join(usgsDir, 'basins', 'HBN.shp')
# gageII.extractBasins(siteNoHBN, outShapeFile)
usgsDir = os.path.join(kPath.dirData, 'USGS')
outShapeFile = os.path.join(usgsDir, 'basins', 'basinRef.shp')
gageII.extractBasins(siteNoRef, outShapeFile)

# wrap up data
if not waterQuality.exist('HBN'):
    wqData = waterQuality.DataModelWQ.new('HBN', siteNoHBN)
if not waterQuality.exist('HBN-30d'):
    wqData = waterQuality.DataModelWQ.new('HBN-30d', siteNoHBN, rho=30)
if not waterQuality.exist('HBN-5s'):
    wqData = waterQuality.DataModelWQ.new('HBN-5s', siteNoHBN[:5])
if not waterQuality.exist('HBN-5s-30d'):
    wqData = waterQuality.DataModelWQ.new('HBN-5s-30d', siteNoHBN[:5], rho=30)

# wrap up data
if not waterQuality.exist('HBN'):
    wqData = waterQuality.DataModelWQ.new('HBN', siteNoHBN)
else:
    wqData = waterQuality.DataModelWQ('HBN')
if 'first80-rm2' not in wqData.subset.keys():
    ind = wqData.subset['first80']