def testCvineConstruct(self):
        stocks = np.loadtxt(dataDir + 'stocks.csv', delimiter=',')
        x = stocks[:, 0]
        y = stocks[:, 1]
        z = stocks[:, 4]
        p = stocks[:, 5]
        e = stocks[:, 6]
        # Create pandas data table
        tstData = pd.DataFrame()
        tstData['1a'] = x
        tstData['2b'] = y
        tstData['3c'] = z
        tstData['4d'] = p
        tstData['5e'] = e
        # Visualize multivar data
        matrixPairPlot(tstData, savefig="quad_varaite_ex.png")
        # Visualize multivar ranked data
        ranked_data = tstData.dropna().rank() / (len(tstData) + 1)
        # ranked_data['1a'] = ranked_data['1a']
        matrixPairPlot(ranked_data, savefig="quad_varaite_ranked_ex.png")

        # Init Cvine
        tstVine = Cvine(ranked_data)

        # construct the vine
        tstVine.constructVine()

        # plot vine
        tstVine.plotVine(savefig="c_vine_graph_ex.png")

        # sample from vine
        samples = tstVine.sample(n=8000)
        matrixPairPlot(samples, savefig="quad_varaite_resampled_ex.png")
示例#2
0
def main():
    # read data from external h5 file
    h5file = 'Cicada_cfd_180x_cht.h5.post.binned.h5'
    # store = pd.HDFStore(h5file)
    store = pt.open_file(h5file)
    bounds = h5Load(store, "Water/UO2 [Interface 1]/Temperature_bounds")
    temperature = h5Load(store, "Water/UO2 [Interface 1]/Temperature")
    tke = h5Load(store, "Water/UO2 [Interface 1]/TurbulentKineticEnergy")
    crud_thick = h5Load(store, "Water/UO2 [Interface 1]/CrudThickness")
    b10 = h5Load(store, "Water/UO2 [Interface 1]/CrudBoronDensity")
    weight = h5Load(store, "Water/UO2 [Interface 1]/Temperature_weights")
    bhf = h5Load(store, "Water/UO2 [Interface 1]/BoundaryHeatFlux")
    """
    # create multi-variate dataset for span 1
    # for zone in range(69, 81):
    for zone in range(69, 78):
        lower_b = bounds.read()[:, zone][0]
        print("Generating plot for zone: " + str(zone))
        temps = temperature.read()[:, zone][~np.isnan(temperature.read()[:, zone])]
        tkes = tke.read()[:, zone][~np.isnan(tke.read()[:, zone])]
        cruds = crud_thick.read()[:, zone][~np.isnan(crud_thick.read()[:, zone])]
        b10s = b10.read()[:, zone][~np.isnan(b10.read()[:, zone])]
        bhfs = bhf.read()[:, zone][~np.isnan(bhf.read()[:, zone])]
        weights = weight.read()[:, zone][~np.isnan(weight.read()[:, zone])]
        span_1_dataDict = {"Residual Temperature [K]": temps,
                           "Residual TKE [J/kg]": tkes,
                           "Residual BHF [W/m^2]": bhfs,
                           }
        span_1_mvd = mvd.Mvd()
        span_1_mvd.setData(span_1_dataDict, weights)
        span_1_mvd.plot(savefig="mvd_" + str(round(lower_b, 3)) + ".png", kde=False)
    """

    # upper span plot
    tsat = -618.5
    zones = range(72, 74)
    temps = temperature.read()[:,
                               zones][~np.isnan(temperature.read()[:, zones])]
    tkes = tke.read()[:, zones][~np.isnan(tke.read()[:, zones])]
    cruds = crud_thick.read()[:, zones][~np.isnan(crud_thick.read()[:, zones])]
    b10s = b10.read()[:, zones][~np.isnan(b10.read()[:, zones])]
    bhfs = bhf.read()[:, zones][~np.isnan(bhf.read()[:, zones])]
    weights = weight.read()[:, zones][~np.isnan(weight.read()[:, zones])]
    span_1_dataDict = {
        "Residual Temperature [K]": temps,
        "Residual TKE [J/kg]": tkes,
        "Residual BHF [W/m^2]": bhfs,
    }
    span_1_mvd = mvd.Mvd()
    span_1_mvd.setData(span_1_dataDict, weights)
    span_1_mvd.plot(savefig="upper_span.png", kde=False)

    # fit bivariate copula to span plot; T vs TKE:
    # copula = bvc.PairCopula(temps, tkes)
    # copula.copulaTournament()

    # init Cvine
    print("================= Construct Upper Vine =================")
    upperData = pd.DataFrame({"t": temps, "tke": tkes, "q": bhfs})
    upperVine = Cvine(pd.DataFrame({"t": temps, "tke": tkes, "q": bhfs}))
    upperVine.constructVine()
    upperVine.plotVine(savefig="upper_vine.png")
    print("========================================================")
    upperVineSamples = upperVine.sample(n=500)
    plt.figure(22)
    matrixPairPlot(upperVineSamples, savefig="upper_vine_samples.png")
    upper_ranked_data = upperData.dropna().rank() / (len(upperData) + 1)
    matrixPairPlot(upper_ranked_data, savefig="upper_ranked_samples.png")
    t_hat_vine, tke_hat_vine, q_hat_vine = upperVineSamples[
        't'], upperVineSamples['tke'], upperVineSamples['q']

    # plot original
    # bvc.bvJointPlot(temps, tkes, savefig="upper_t_tke_original.png")

    # sample from copula
    # print("Copula Params: " + str(copula.copulaParams))
    # t_hat, tke_hat = copula.copulaModel.sample(500)
    # bvc.bvJointPlot(t_hat_vine, tke_hat_vine, savefig="upper_t_tke_copula_sample.png")

    # rand_u = np.linspace(0.05, 0.95, 40)
    # rand_v = np.linspace(0.05, 0.95, 40)
    # u, v = np.meshgrid(rand_u, rand_v)
    # copula_pdf = copula.copulaModel.pdf(u.flatten(), v.flatten())
    # bvc.bvContourf(u.flatten(), v.flatten(), copula_pdf, savefig="upper_t_tke_copula_pdf.png")

    # Resample original data
    def icdf_uv_bisect(ux, X, marginalCDFModel):
        icdf = np.zeros(np.array(X).size)
        for i, xx in enumerate(X):
            kde_cdf_err = lambda m: xx - marginalCDFModel(-np.inf, m)
            try:
                icdf[i] = bisect(kde_cdf_err,
                                 min(ux) - np.abs(0.5 * min(ux)),
                                 max(ux) + np.abs(0.5 * max(ux)),
                                 xtol=1e-3,
                                 maxiter=15)
                icdf[i] = newton(kde_cdf_err, icdf[i], tol=1e-6, maxiter=20)
            except:
                icdf[i] = np.nan
        return icdf

    kde_cdf = gaussian_kde(temps).integrate_box
    resampled_t = icdf_uv_bisect(temps, t_hat_vine, kde_cdf)
    kde_cdf = gaussian_kde(tkes).integrate_box
    resampled_tke = icdf_uv_bisect(tkes, tke_hat_vine, kde_cdf)
    bvc.bvJointPlot(resampled_t,
                    resampled_tke,
                    vs=[temps, tkes],
                    savefig="upper_t_tke_resampled.png")

    # LOWER SPAN
    tsat = -618.5
    zones = range(70, 71)
    temps = temperature.read()[:,
                               zones][~np.isnan(temperature.read()[:, zones])]
    tkes = tke.read()[:, zones][~np.isnan(tke.read()[:, zones])]
    cruds = crud_thick.read()[:, zones][~np.isnan(crud_thick.read()[:, zones])]
    b10s = b10.read()[:, zones][~np.isnan(b10.read()[:, zones])]
    bhfs = bhf.read()[:, zones][~np.isnan(bhf.read()[:, zones])]
    weights = weight.read()[:, zones][~np.isnan(weight.read()[:, zones])]
    span_1_dataDict = {
        "Residual Temperature [K]": temps,
        "Residual TKE [J/kg]": tkes,
        "Residual BHF [W/m^2]": bhfs,
    }
    span_1_mvd = mvd.Mvd()
    span_1_mvd.setData(span_1_dataDict, weights)
    span_1_mvd.plot(savefig="lower_span.png", kde=False)

    # fit bivariate copula to span plot; T vs TKE:
    # copula = bvc.PairCopula(temps, tkes)
    # copula.copulaTournament()

    # init Cvine
    print("================= Construct Lower Vine =================")
    lowerData = pd.DataFrame({"t": temps, "tke": tkes, "q": bhfs})
    lowerVine = Cvine(pd.DataFrame({"tke": tkes, "t": temps, "q": bhfs}))
    lowerVine.constructVine()
    plt.figure(20)
    lowerVine.plotVine(savefig="lower_vine.png")
    print("========================================================")
    lowerVineSamples = lowerVine.sample(n=500)
    matrixPairPlot(lowerVineSamples, savefig="lower_vine_samples.png")
    lower_ranked_data = lowerData.dropna().rank() / (len(lowerData) + 1)
    matrixPairPlot(lower_ranked_data, savefig="lower_ranked_samples.png")
    t_hat_vine, tke_hat_vine, q_hat_vine = lowerVineSamples[
        't'], lowerVineSamples['tke'], lowerVineSamples['q']

    # plot original
    # bvc.bvJointPlot(temps, tkes, savefig="lower_t_tke_original.png")

    # sample from copula
    # print("Copula Params: " + str(copula.copulaParams))
    # t_hat, tke_hat = copula.copulaModel.sample(500)
    # bvc.bvJointPlot(t_hat_vine, tke_hat_vine, savefig="lower_t_tke_copula_sample.png")

    # rand_u = np.linspace(0.05, 0.95, 40)
    # rand_v = np.linspace(0.05, 0.95, 40)
    # u, v = np.meshgrid(rand_u, rand_v)
    # copula_pdf = copula.copulaModel.pdf(u.flatten(), v.flatten())
    # bvc.bvContourf(u.flatten(), v.flatten(), copula_pdf, savefig="lower_t_tke_copula_pdf.png")

    # Resample original data
    def icdf_uv_bisect(ux, X, marginalCDFModel):
        icdf = np.zeros(np.array(X).size)
        for i, xx in enumerate(X):
            kde_cdf_err = lambda m: xx - marginalCDFModel(-np.inf, m)
            try:
                icdf[i] = bisect(kde_cdf_err,
                                 min(ux) - np.abs(0.5 * min(ux)),
                                 max(ux) + np.abs(0.5 * max(ux)),
                                 xtol=1e-2,
                                 maxiter=10)
                icdf[i] = newton(kde_cdf_err, icdf[i], tol=1e-6, maxiter=20)
            except:
                icdf[i] = np.nan
        return icdf

    kde_cdf = gaussian_kde(temps).integrate_box
    resampled_t = icdf_uv_bisect(temps, t_hat_vine, kde_cdf)
    kde_cdf = gaussian_kde(tkes).integrate_box
    resampled_tke = icdf_uv_bisect(tkes, tke_hat_vine, kde_cdf)
    bvc.bvJointPlot(resampled_t,
                    resampled_tke,
                    vs=[temps, tkes],
                    savefig="lower_t_tke_resampled.png")

    # Clean up
    store.close()
    def testCvineConstruct(self):
        stocks = np.loadtxt(dataDir + 'stocks.csv', delimiter=',')
        x = stocks[:, 0]
        y = stocks[:, 1]
        z = stocks[:, 4]
        p = stocks[:, 5]
        e = stocks[:, 6]
        # Create pandas data table
        tstData = pd.DataFrame()
        tstData['1a'] = x
        tstData['2b'] = y
        tstData['3c'] = z
        tstData['4d'] = p
        tstData['5e'] = e
        # Visualize multivar data
        matrixPairPlot(tstData, savefig="quad_varaite_ex.png")
        # Visualize multivar ranked data
        ranked_data = tstData.dropna().rank() / (len(tstData) + 1)
        # ranked_data['1a'] = ranked_data['1a']
        matrixPairPlot(ranked_data, savefig="quad_varaite_ranked_ex.png")

        # Init Cvine
        tstVine = Cvine(ranked_data)

        # construct the vine
        tstVine.constructVine()

        # plot vine
        tstVine.plotVine(savefig="c_vine_graph_ex.png")

        # sample from vine
        c_vine_samples = tstVine.sample(n=8000)
        matrixPairPlot(c_vine_samples, savefig="vine_resampled_ex.png")

        # check that the original data has same correlation coefficients as re-sampled
        # data from the fitted c-vine
        tst_rho_matrix = ranked_data.corr(method='pearson')
        tst_ktau_matrix = ranked_data.corr(method='kendall')
        sample_rho_matrix = c_vine_samples.corr(method='pearson')
        sample_ktau_matrix = c_vine_samples.corr(method='kendall')
        # sort by col labels
        tst_rho_matrix = tst_rho_matrix.reindex(sorted(tst_rho_matrix.columns),
                                                axis=1)
        tst_ktau_matrix = tst_ktau_matrix.reindex(sorted(
            tst_ktau_matrix.columns),
                                                  axis=1)
        sample_rho_matrix = sample_rho_matrix.reindex(sorted(
            sample_rho_matrix.columns),
                                                      axis=1)
        sample_ktau_matrix = sample_ktau_matrix.reindex(sorted(
            sample_ktau_matrix.columns),
                                                        axis=1)

        print("Original data corr matrix:")
        print(tst_rho_matrix)
        print("Vine sample corr matrix:")
        print(sample_rho_matrix)
        print("Diff:")
        print(tst_rho_matrix - sample_rho_matrix)
        self.assertTrue(
            np.allclose(tst_rho_matrix - sample_rho_matrix, 0, atol=0.10))
        self.assertTrue(
            np.allclose(tst_ktau_matrix - sample_ktau_matrix, 0, atol=0.10))

        # fit marginal distributions to original data
        marginal_dict = {}
        for col_name in tstData.columns:
            marginal_dict[col_name] = beta(*beta.fit(tstData[col_name]))
        # scale the samples
        c_vine_scaled_samples_a = tstVine.scaleSamples(c_vine_samples,
                                                       marginal_dict)
        matrixPairPlot(c_vine_scaled_samples_a,
                       savefig="vine_varaite_resampled_scaled_a.png")

        c_vine_scaled_samples_b = tstVine.sampleScale(8000, marginal_dict)

        # compute correlation coeffs
        sample_scaled_rho_matrix_a = c_vine_scaled_samples_a.corr(
            method='pearson')
        sample_scaled_rho_matrix_b = c_vine_scaled_samples_b.corr(
            method='pearson')

        # check for consistency
        self.assertTrue(
            np.allclose(tst_rho_matrix - sample_scaled_rho_matrix_a,
                        0,
                        atol=0.1))
        self.assertTrue(
            np.allclose(tst_rho_matrix - sample_scaled_rho_matrix_b,
                        0,
                        atol=0.1))