Пример #1
0
varC = usgs.newC
# selected sites with obs more than xxx
countMat = np.sum(matC * ~matCF, axis=1)

tempC = [
    '00300', '00400', '00405', '00600', '00605', '00618', '00660', '00665',
    '00681', '00915', '00925', '00930', '00935', '00940', '00945', '00955',
    '71846', '80154'
]

nc = len(codeLst)
mat1 = np.zeros([nc, nc])
mat2 = np.zeros([nc, nc])
for j, c1 in enumerate(codeLst):
    a = matC[:, :, codeLst.index(c1)]
    for i, c2 in enumerate(codeLst):
        print(j, i)
        b = matC[:, :, codeLst.index(c2)]
        mat1[j, i] = np.sum(a * b) / np.sum(a)
        the = 200
        ix = np.sum(a, axis=1) > the
        mat2[j, i] = np.sum(a[ix, :] * b[ix, :]) / np.sum(a[ix, :])

fig, ax = plt.subplots(1, 1)
axplot.plotHeatMap(ax, mat1 * 100, labLst=codeLst)
fig.show()

fig, ax = plt.subplots(1, 1)
axplot.plotHeatMap(ax, mat2 * 100, labLst=codeLst)
fig.show()
Пример #2
0
# codeSel = ['00405','00600', '00605', '00618', '00660', '00665','71846']
codeSel = usgs.newC
indC = [codeLst.index(code) for code in codeSel]
mat = matB[:, :, indC]
the = 0
count = np.sum(np.any(mat, axis=2), axis=1)
indS = np.where(count > the)[0]
nc = len(codeSel)
out = np.ndarray([nc, nc])
for j, codej in enumerate(codeSel):
    cj = codeLst.index(codej)
    for i, codei in enumerate(codeSel):
        ci = codeLst.index(codei)
        if i == j:
            a = matB[indS, :, cj]
            b1 = np.any(matB[indS, :, :cj], axis=2)
            b2 = np.any(matB[indS, :, cj+1:], axis=2)
            b = b1 | b2
            # at least one other is observed
            out[j, i] = 1-np.sum(a & b)/np.sum(a)
        else:
            a = matB[indS, :, cj]
            b = matB[indS, :, ci]
            out[j, i] = np.sum(a & b)/np.sum(a)

labelLst = ['{} {}'.format(usgs.codePdf.loc[code]['shortName'], code)
            for code in codeSel]
fig, ax = plt.subplots(1, 1)
axplot.plotHeatMap(ax, out*100, labLst=labelLst)
fig.show()
Пример #3
0
if 'calCount' in doLst:
    # find out two variables (hopefully one rock one bio) that are most related
    df0 = pd.read_csv(os.path.join(dirInv, 'codeCount.csv'),
                    dtype={'siteNo': str}, index_col='siteNo')
    df1 = pd.read_csv(os.path.join(dirInv, 'codeCount_B2000.csv'),
                    dtype={'siteNo': str}, index_col='siteNo')
    df2 = pd.read_csv(os.path.join(dirInv, 'codeCount_A2000.csv'),
                    dtype={'siteNo': str}, index_col='siteNo')
    nc = len(codeLst)
    dfLst = [df0, df1, df2]
    titleLst = ['all', 'B2000', 'C2000']
    for df, title in zip(dfLst, titleLst):
        matCorr = np.full([nc, nc], np.nan)
        for j, c1 in enumerate(codeLst):
            for i, c2 in enumerate(codeLst):
                v1 = df[c1].values
                v2 = df[c2].values
                # ind = np.where((v1 != 0) & (v2 != 0))[0]
                # corr, p = scipy.stats.spearmanr(v1[ind], v2[ind])
                corr, p = scipy.stats.spearmanr(v1, v2)
                # corr, p = scipy.stats.pearsonr(v1, v2)
                matCorr[j, i] = corr
        varNameLst = ['{} {}'.format(
            usgs.codePdf.loc[code]['shortName'], code) for code in codeLst]
        fig, ax = plt.subplots()
        axplot.plotHeatMap(ax, matCorr*100, varNameLst)
        ax.set_title('spearman correlation of {}'.format(title))
        fig.tight_layout()
        fig.show()
Пример #4
0
fig, ax = plt.subplots(1, 1)
axplot.plotTS(ax, d2.t, [yOut[:, 0, 0], d2.y[:, 0, 0]])
fig.show()
fig, ax = plt.subplots(1, 1)
axplot.plotTS(ax, d2.t, [yOut[:, 0, 1], d2.y[:, 0, 1]])
fig.show()

k = 0
# dataPlot = [yP[:, k, :], d1.Y[:, k, :], d2.Y[:, k, :]]
dataPlot = [yOut[:, k, :], d1.y[:, k, :], d2.y[:, k, :]]
cLst = ['red', 'grey', 'black']
fig, axes = figplot.multiTS(DF.t, dataPlot, cLst=cLst)
fig.show()

for k in range(len(varY)):
    utils.stat.calCorr(yOut[:, 0, k], d2.y[:, 0, k])

w = model.linearOut._parameters['weight'].detach().cpu().numpy()
b = model.linearOut._parameters['bias'].detach().cpu().numpy()

fig, ax = plt.subplots(1, 1)
ind = np.argsort(w[0, :])
ax.plot(w[0, ind], 'k-')
for k, code in enumerate(codeSel):
    ax.plot(w[k + 1, ind], '-')
fig.show()

fig, ax = plt.subplots(1, 1)
axplot.plotHeatMap(ax, np.corrcoef(w) * 100, varY)
fig.show()
Пример #5
0
d1 = dbBasin.DataModelBasin(DF, subset=trainSet, varY=codeSel)
d2 = dbBasin.DataModelBasin(DF, subset=testSet, varY=codeSel)
for k in range(len(DF.siteNoLst)):
    dataPlot = [yW[:, k, :], yP[:, k, :], d1.Y[:, k, :], d2.Y[:, k, :]]
    cLst = ['blue', 'red', 'grey', 'black']
    fig, axes = figplot.multiTS(DF.t, dataPlot, labelLst=labelLst, cLst=cLst)
    fig.show()

mat1 = np.ndarray([len(siteNoLst), len(codeSel)])
mat2 = np.ndarray([len(siteNoLst), len(codeSel)])
for indS, siteNo in enumerate(siteNoLst):
    for indC, code in enumerate(codeSel):
        corr1 = utils.stat.calCorr(yP[:, indS, indC], d2.Y[:, indS, indC])
        mat1[indS, indC] = corr1
        corr2 = utils.stat.calCorr(yW[:, indS, indC], d2.Y[:, indS, indC])
        mat2[indS, indC] = corr2

fig, ax = plt.subplots(1, 1)
axplot.plotHeatMap(ax,
                   mat1 * 100,
                   labLst=[siteNoLst, codeSel],
                   vRange=[70, 90])
fig.show()

fig, ax = plt.subplots(1, 1)
axplot.plotHeatMap(ax,
                   mat2 * 100,
                   labLst=[siteNoLst, codeSel],
                   vRange=[70, 90])
fig.show()
Пример #6
0
c = wqData.c
varC = wqData.varC
varNameLst = usgs.codePdf.loc[varC]['shortName'].tolist()
nc = c.shape[1]

# calculate all at once
matCorr = np.full([nc, nc], np.nan)
for j in range(nc):
    for i in range(nc):
        (a, b), kk = utils.rmNan([c[:, j], c[:, i]])
        if len(kk) > 0:
            matCorr[j, i] = np.corrcoef(a, b)[0, 1]

importlib.reload(axplot)
fig, ax = plt.subplots()
axplot.plotHeatMap(ax, matCorr * 100, varNameLst)
fig.tight_layout()
fig.show()

# calculate site by site
ns = len(siteNoLst)
matCorrAll = np.full([nc, nc, ns], np.nan)
for k in range(ns):
    siteNo = siteNoLst[k]
    ind = wqData.info[wqData.info['siteNo'] == siteNo].index
    c = wqData.c[ind]
    for j in range(nc):
        for i in range(nc):
            (a, b), kk = utils.rmNan([c[:, j], c[:, i]])
            if len(kk) > 0:
                matCorrAll[j, i, k] = np.corrcoef(a, b)[0, 1]
Пример #7
0
    dictSite = json.load(f)
codeLst = sorted(usgs.newC)
ep = 500
reTest = True
siteNoLst = dictSite['comb']
nSite = len(siteNoLst)
dataName = 'rbWN5'
wqData = waterQuality.DataModelWQ(dataName)

codeLst = sorted(usgs.newC)
info = wqData.info

out = np.ndarray([len(codeLst), len(codeLst)])
for k, code in enumerate(codeLst):
    ic = wqData.varC.index(code)
    siteNoCode = dictSite[code]
    bs = info['siteNo'].isin(siteNoCode)
    bv = ~np.isnan(wqData.c[:, wqData.varC.index(code)])
    ind = info.index[bs & bv].values
    mat = wqData.c[ind, :]
    count = np.sum(~np.isnan(mat), axis=0)
    n = count[ic]
    countP = count / n
    for j, code2 in enumerate(codeLst):
        ic2 = wqData.varC.index(code2)
        out[k, j] = countP[ic2]

fig, ax = plt.subplots(1, 1)
axplot.plotHeatMap(ax, out * 100, codeLst)
fig.show()
Пример #8
0
import pandas as pd
import numpy as np
import os
import time
import scipy
import json

# all gages
dirInv = os.path.join(kPath.dirData, 'USGS', 'inventory')
fileSiteNo = os.path.join(dirInv, 'siteNoLst-1979')
siteNoLstAll = pd.read_csv(fileSiteNo, header=None, dtype=str)[0].tolist()
codeLst = sorted(usgs.codeLst)
countMatD = np.load(os.path.join(dirInv, 'matCountDaily.npy'))
countMatW = np.load(os.path.join(dirInv, 'matCountWeekly.npy'))

ny = 3
nsLst = np.arange(5, 20) * ny
# nsLst = [20, 24, 28, 32, 36, 40, 44, 45,
#          46, 47, 48, 52, 56, 60, 64, 68, 72, 76]
outMat = np.ndarray([len(codeLst), len(nsLst)])
for i, code in enumerate(codeLst):
    ic = codeLst.index(code)
    count = np.sum(countMatW[:, -ny:, ic], axis=1)
    for j, ns in enumerate(nsLst):
        outMat[i, j] = np.sum(count >= ns)

# plot
fig, ax = plt.subplots(1, 1, figsize=(6, 6))
axplot.plotHeatMap(ax, outMat, labLst=[codeLst, nsLst])
fig.show()
Пример #9
0
            corrMat[iS, iT] = corr
            rmseMat[iS, iT] = rmse

    dfG = gageII.readData(varLst=gageII.varLst, siteNoLst=siteNoLst)
    dfG = gageII.updateCode(dfG)

    pMat = dfG.values
    dfS = DGSA.DGSA_light(pMat,
                          corrMat[:, 1:2],
                          ParametersNames=dfG.columns.tolist(),
                          n_clsters=3)
    dfP[code] = dfS

importlib.reload(axplot)
dfP = dfP.sort_index(axis=1)
labX = list()
for code in dfP.columns.tolist():
    temp = usgs.codePdf.loc[code]['shortName']
    labX.append('{} {}'.format(temp, code))

labLst = [dfP.index.tolist(), labX]
fig, ax = plt.subplots()

ax = axplot.plotHeatMap(ax,
                        dfP.values,
                        fmt='{:.2f}',
                        labLst=labLst,
                        vRange=[0, 3])
fig.tight_layout()
fig.show()