示例#1
0
    def testWgtCopula(self):
        """!
        @brief Test ability to construct copula
        given samples with unequal weights.
        Compose two bivariate gauss dists, one with
        positive and one with negative depencence.
        Sample from dists.
        Assign large sample weights to positive gauss
        and low sample weights to neg gauss.
        Combine weighted samples into a single "X" shaped distribution.
        Refit weighted samples and ensure positive depencence
        """
        # construct gaussian margins; mu={0, 0}, sd={1.0, 2}
        marg1 = Uvm("gauss")(1e-3, 1.)
        marg2 = Uvm("gauss")(1e-3, 2.)

        # construct gaussian copula positive dep
        cop1 = Copula("gauss")
        cop1.fittedParams = [0.7]

        # construct gaussian copula neg dep
        cop2 = Copula("gauss")
        cop2.fittedParams = [-0.7]

        # draw 1000 samples from each model
        n = 1000
        rvs1 = marg1.rvs(size=n)
        rvs2 = marg2.rvs(size=n)
        x1, y1 = cop1.sampleScale(rvs1, rvs2, marg1.cdf, marg2.cdf)
        x2, y2 = cop2.sampleScale(rvs1, rvs2, marg1.cdf, marg2.cdf)

        # assign weights to each gauss sample group
        cop1_wgts = np.ones(n) * 0.95
        cop2_wgts = np.ones(n) * 0.05

        # combine both gauss models into dbl gauss model
        x = np.append(x1, x2)
        y = np.append(y1, y2)
        wgts = np.append(cop1_wgts, cop2_wgts)

        # plot
        data = pd.DataFrame([x, y]).T
        matrixPairPlot(data, weights=wgts, savefig='x_gauss_original.png')

        # fit copula to weighted data
        copModel = PairCopula(x, y, wgts)
        copModel.copulaTournament()

        # verify that a positive dep copula was produced with a
        # dep parameter of slightly less than 0.7
        x_wt, y_wt = copModel.copulaModel.sampleScale(rvs1, rvs2, marg1.cdf,
                                                      marg2.cdf)
        self.assertTrue(copModel.copulaModel.kTau() > 0.)
        self.assertTrue((copModel.copulaModel.fittedParams[0] > 0.)
                        & (copModel.copulaModel.fittedParams[0] < 0.7))

        # plot
        data = pd.DataFrame([x_wt, y_wt]).T
        matrixPairPlot(data, savefig='x_gauss_weighted_fit.png')
示例#2
0
    def testWgtResampledCopula(self):
        """!
        @brief Test ability to construct copula
        given samples with unequal weights using a resampling strat
        """
        np.random.seed(123)
        # construct gaussian margins; mu={0, 0}, sd={1.0, 2}
        # marg1 = Uvm("gauss")(1e-3, 1.)
        marg1 = norm(loc=1e-3, scale=1.0)
        # marg2 = Uvm("gauss")(1e-3, 2.)
        marg2 = norm(loc=1e-3, scale=2.0)

        # construct gaussian copula positive dep
        cop1 = Copula("gauss")
        cop1.fittedParams = [0.7]

        # construct gaussian copula neg dep
        cop2 = Copula("gauss")
        cop2.fittedParams = [-0.7]

        # draw 1000 samples from each model
        n = 1000
        x1, y1 = cop1.sampleScale(marg1, marg2, n)
        x2, y2 = cop2.sampleScale(marg1, marg2, n)

        # assign weights to each gauss sample group
        cop1_wgts = np.ones(n) * 0.95
        cop2_wgts = np.ones(n) * 0.05

        # combine both gauss models into dbl gauss model
        x = np.append(x1, x2)
        y = np.append(y1, y2)
        wgts = np.append(cop1_wgts, cop2_wgts)

        # fit copula to weighted data
        copModel = PairCopula(x, y, wgts, resample=10)
        copModel.copulaTournament()

        resampled_data = pd.DataFrame([copModel.x, copModel.y]).T
        matrixPairPlot(resampled_data, savefig='x_gauss_resampled.png')

        # verify that a positive dep copula was produced with a
        # dep parameter of slightly less than 0.7
        x_wt, y_wt = copModel.copulaModel.sampleScale(marg1, marg2, n)
        self.assertTrue(copModel.copulaModel.kTau() > 0.)
        self.assertTrue((copModel.copulaModel.fittedParams[0] > 0.)
                        & (copModel.copulaModel.fittedParams[0] < 0.7))

        # plot
        data = pd.DataFrame([x_wt, y_wt]).T
        matrixPairPlot(data, savefig='x_gauss_resampled_fit.png')
    def testCvineConstruct(self):
        stocks = np.loadtxt(dataDir + 'stocks.csv', delimiter=',')
        x = stocks[:, 0]
        y = stocks[:, 1]
        z = stocks[:, 4]
        p = stocks[:, 5]
        e = stocks[:, 6]
        # Create pandas data table
        tstData = pd.DataFrame()
        tstData['1a'] = x
        tstData['2b'] = y
        tstData['3c'] = z
        tstData['4d'] = p
        tstData['5e'] = e
        # Visualize multivar data
        matrixPairPlot(tstData, savefig="quad_varaite_ex.png")
        # Visualize multivar ranked data
        ranked_data = tstData.dropna().rank() / (len(tstData) + 1)
        # ranked_data['1a'] = ranked_data['1a']
        matrixPairPlot(ranked_data, savefig="quad_varaite_ranked_ex.png")

        # Init Cvine
        tstVine = Cvine(ranked_data)

        # construct the vine
        tstVine.constructVine()

        # plot vine
        tstVine.plotVine(savefig="c_vine_graph_ex.png")

        # sample from vine
        samples = tstVine.sample(n=8000)
        matrixPairPlot(samples, savefig="quad_varaite_resampled_ex.png")
示例#4
0
def main():
    # read data from external h5 file
    h5file = 'Cicada_cfd_180x_cht.h5.post.binned.h5'
    # store = pd.HDFStore(h5file)
    store = pt.open_file(h5file)
    bounds = h5Load(store, "Water/UO2 [Interface 1]/Temperature_bounds")
    temperature = h5Load(store, "Water/UO2 [Interface 1]/Temperature")
    tke = h5Load(store, "Water/UO2 [Interface 1]/TurbulentKineticEnergy")
    crud_thick = h5Load(store, "Water/UO2 [Interface 1]/CrudThickness")
    b10 = h5Load(store, "Water/UO2 [Interface 1]/CrudBoronDensity")
    weight = h5Load(store, "Water/UO2 [Interface 1]/Temperature_weights")
    bhf = h5Load(store, "Water/UO2 [Interface 1]/BoundaryHeatFlux")
    """
    # create multi-variate dataset for span 1
    # for zone in range(69, 81):
    for zone in range(69, 78):
        lower_b = bounds.read()[:, zone][0]
        print("Generating plot for zone: " + str(zone))
        temps = temperature.read()[:, zone][~np.isnan(temperature.read()[:, zone])]
        tkes = tke.read()[:, zone][~np.isnan(tke.read()[:, zone])]
        cruds = crud_thick.read()[:, zone][~np.isnan(crud_thick.read()[:, zone])]
        b10s = b10.read()[:, zone][~np.isnan(b10.read()[:, zone])]
        bhfs = bhf.read()[:, zone][~np.isnan(bhf.read()[:, zone])]
        weights = weight.read()[:, zone][~np.isnan(weight.read()[:, zone])]
        span_1_dataDict = {"Residual Temperature [K]": temps,
                           "Residual TKE [J/kg]": tkes,
                           "Residual BHF [W/m^2]": bhfs,
                           }
        span_1_mvd = mvd.Mvd()
        span_1_mvd.setData(span_1_dataDict, weights)
        span_1_mvd.plot(savefig="mvd_" + str(round(lower_b, 3)) + ".png", kde=False)
    """

    # upper span plot
    tsat = -618.5
    zones = range(72, 74)
    temps = temperature.read()[:,
                               zones][~np.isnan(temperature.read()[:, zones])]
    tkes = tke.read()[:, zones][~np.isnan(tke.read()[:, zones])]
    cruds = crud_thick.read()[:, zones][~np.isnan(crud_thick.read()[:, zones])]
    b10s = b10.read()[:, zones][~np.isnan(b10.read()[:, zones])]
    bhfs = bhf.read()[:, zones][~np.isnan(bhf.read()[:, zones])]
    weights = weight.read()[:, zones][~np.isnan(weight.read()[:, zones])]
    span_1_dataDict = {
        "Residual Temperature [K]": temps,
        "Residual TKE [J/kg]": tkes,
        "Residual BHF [W/m^2]": bhfs,
    }
    span_1_mvd = mvd.Mvd()
    span_1_mvd.setData(span_1_dataDict, weights)
    span_1_mvd.plot(savefig="upper_span.png", kde=False)

    # fit bivariate copula to span plot; T vs TKE:
    # copula = bvc.PairCopula(temps, tkes)
    # copula.copulaTournament()

    # init Cvine
    print("================= Construct Upper Vine =================")
    upperData = pd.DataFrame({"t": temps, "tke": tkes, "q": bhfs})
    upperVine = Cvine(pd.DataFrame({"t": temps, "tke": tkes, "q": bhfs}))
    upperVine.constructVine()
    upperVine.plotVine(savefig="upper_vine.png")
    print("========================================================")
    upperVineSamples = upperVine.sample(n=500)
    plt.figure(22)
    matrixPairPlot(upperVineSamples, savefig="upper_vine_samples.png")
    upper_ranked_data = upperData.dropna().rank() / (len(upperData) + 1)
    matrixPairPlot(upper_ranked_data, savefig="upper_ranked_samples.png")
    t_hat_vine, tke_hat_vine, q_hat_vine = upperVineSamples[
        't'], upperVineSamples['tke'], upperVineSamples['q']

    # plot original
    # bvc.bvJointPlot(temps, tkes, savefig="upper_t_tke_original.png")

    # sample from copula
    # print("Copula Params: " + str(copula.copulaParams))
    # t_hat, tke_hat = copula.copulaModel.sample(500)
    # bvc.bvJointPlot(t_hat_vine, tke_hat_vine, savefig="upper_t_tke_copula_sample.png")

    # rand_u = np.linspace(0.05, 0.95, 40)
    # rand_v = np.linspace(0.05, 0.95, 40)
    # u, v = np.meshgrid(rand_u, rand_v)
    # copula_pdf = copula.copulaModel.pdf(u.flatten(), v.flatten())
    # bvc.bvContourf(u.flatten(), v.flatten(), copula_pdf, savefig="upper_t_tke_copula_pdf.png")

    # Resample original data
    def icdf_uv_bisect(ux, X, marginalCDFModel):
        icdf = np.zeros(np.array(X).size)
        for i, xx in enumerate(X):
            kde_cdf_err = lambda m: xx - marginalCDFModel(-np.inf, m)
            try:
                icdf[i] = bisect(kde_cdf_err,
                                 min(ux) - np.abs(0.5 * min(ux)),
                                 max(ux) + np.abs(0.5 * max(ux)),
                                 xtol=1e-3,
                                 maxiter=15)
                icdf[i] = newton(kde_cdf_err, icdf[i], tol=1e-6, maxiter=20)
            except:
                icdf[i] = np.nan
        return icdf

    kde_cdf = gaussian_kde(temps).integrate_box
    resampled_t = icdf_uv_bisect(temps, t_hat_vine, kde_cdf)
    kde_cdf = gaussian_kde(tkes).integrate_box
    resampled_tke = icdf_uv_bisect(tkes, tke_hat_vine, kde_cdf)
    bvc.bvJointPlot(resampled_t,
                    resampled_tke,
                    vs=[temps, tkes],
                    savefig="upper_t_tke_resampled.png")

    # LOWER SPAN
    tsat = -618.5
    zones = range(70, 71)
    temps = temperature.read()[:,
                               zones][~np.isnan(temperature.read()[:, zones])]
    tkes = tke.read()[:, zones][~np.isnan(tke.read()[:, zones])]
    cruds = crud_thick.read()[:, zones][~np.isnan(crud_thick.read()[:, zones])]
    b10s = b10.read()[:, zones][~np.isnan(b10.read()[:, zones])]
    bhfs = bhf.read()[:, zones][~np.isnan(bhf.read()[:, zones])]
    weights = weight.read()[:, zones][~np.isnan(weight.read()[:, zones])]
    span_1_dataDict = {
        "Residual Temperature [K]": temps,
        "Residual TKE [J/kg]": tkes,
        "Residual BHF [W/m^2]": bhfs,
    }
    span_1_mvd = mvd.Mvd()
    span_1_mvd.setData(span_1_dataDict, weights)
    span_1_mvd.plot(savefig="lower_span.png", kde=False)

    # fit bivariate copula to span plot; T vs TKE:
    # copula = bvc.PairCopula(temps, tkes)
    # copula.copulaTournament()

    # init Cvine
    print("================= Construct Lower Vine =================")
    lowerData = pd.DataFrame({"t": temps, "tke": tkes, "q": bhfs})
    lowerVine = Cvine(pd.DataFrame({"tke": tkes, "t": temps, "q": bhfs}))
    lowerVine.constructVine()
    plt.figure(20)
    lowerVine.plotVine(savefig="lower_vine.png")
    print("========================================================")
    lowerVineSamples = lowerVine.sample(n=500)
    matrixPairPlot(lowerVineSamples, savefig="lower_vine_samples.png")
    lower_ranked_data = lowerData.dropna().rank() / (len(lowerData) + 1)
    matrixPairPlot(lower_ranked_data, savefig="lower_ranked_samples.png")
    t_hat_vine, tke_hat_vine, q_hat_vine = lowerVineSamples[
        't'], lowerVineSamples['tke'], lowerVineSamples['q']

    # plot original
    # bvc.bvJointPlot(temps, tkes, savefig="lower_t_tke_original.png")

    # sample from copula
    # print("Copula Params: " + str(copula.copulaParams))
    # t_hat, tke_hat = copula.copulaModel.sample(500)
    # bvc.bvJointPlot(t_hat_vine, tke_hat_vine, savefig="lower_t_tke_copula_sample.png")

    # rand_u = np.linspace(0.05, 0.95, 40)
    # rand_v = np.linspace(0.05, 0.95, 40)
    # u, v = np.meshgrid(rand_u, rand_v)
    # copula_pdf = copula.copulaModel.pdf(u.flatten(), v.flatten())
    # bvc.bvContourf(u.flatten(), v.flatten(), copula_pdf, savefig="lower_t_tke_copula_pdf.png")

    # Resample original data
    def icdf_uv_bisect(ux, X, marginalCDFModel):
        icdf = np.zeros(np.array(X).size)
        for i, xx in enumerate(X):
            kde_cdf_err = lambda m: xx - marginalCDFModel(-np.inf, m)
            try:
                icdf[i] = bisect(kde_cdf_err,
                                 min(ux) - np.abs(0.5 * min(ux)),
                                 max(ux) + np.abs(0.5 * max(ux)),
                                 xtol=1e-2,
                                 maxiter=10)
                icdf[i] = newton(kde_cdf_err, icdf[i], tol=1e-6, maxiter=20)
            except:
                icdf[i] = np.nan
        return icdf

    kde_cdf = gaussian_kde(temps).integrate_box
    resampled_t = icdf_uv_bisect(temps, t_hat_vine, kde_cdf)
    kde_cdf = gaussian_kde(tkes).integrate_box
    resampled_tke = icdf_uv_bisect(tkes, tke_hat_vine, kde_cdf)
    bvc.bvJointPlot(resampled_t,
                    resampled_tke,
                    vs=[temps, tkes],
                    savefig="lower_t_tke_resampled.png")

    # Clean up
    store.close()
    def testCvineConstruct(self):
        stocks = np.loadtxt(dataDir + 'stocks.csv', delimiter=',')
        x = stocks[:, 0]
        y = stocks[:, 1]
        z = stocks[:, 4]
        p = stocks[:, 5]
        e = stocks[:, 6]
        # Create pandas data table
        tstData = pd.DataFrame()
        tstData['1a'] = x
        tstData['2b'] = y
        tstData['3c'] = z
        tstData['4d'] = p
        tstData['5e'] = e
        # Visualize multivar data
        matrixPairPlot(tstData, savefig="quad_varaite_ex.png")
        # Visualize multivar ranked data
        ranked_data = tstData.dropna().rank() / (len(tstData) + 1)
        # ranked_data['1a'] = ranked_data['1a']
        matrixPairPlot(ranked_data, savefig="quad_varaite_ranked_ex.png")

        # Init Cvine
        tstVine = Cvine(ranked_data)

        # construct the vine
        tstVine.constructVine()

        # plot vine
        tstVine.plotVine(savefig="c_vine_graph_ex.png")

        # sample from vine
        c_vine_samples = tstVine.sample(n=8000)
        matrixPairPlot(c_vine_samples, savefig="vine_resampled_ex.png")

        # check that the original data has same correlation coefficients as re-sampled
        # data from the fitted c-vine
        tst_rho_matrix = ranked_data.corr(method='pearson')
        tst_ktau_matrix = ranked_data.corr(method='kendall')
        sample_rho_matrix = c_vine_samples.corr(method='pearson')
        sample_ktau_matrix = c_vine_samples.corr(method='kendall')
        # sort by col labels
        tst_rho_matrix = tst_rho_matrix.reindex(sorted(tst_rho_matrix.columns),
                                                axis=1)
        tst_ktau_matrix = tst_ktau_matrix.reindex(sorted(
            tst_ktau_matrix.columns),
                                                  axis=1)
        sample_rho_matrix = sample_rho_matrix.reindex(sorted(
            sample_rho_matrix.columns),
                                                      axis=1)
        sample_ktau_matrix = sample_ktau_matrix.reindex(sorted(
            sample_ktau_matrix.columns),
                                                        axis=1)

        print("Original data corr matrix:")
        print(tst_rho_matrix)
        print("Vine sample corr matrix:")
        print(sample_rho_matrix)
        print("Diff:")
        print(tst_rho_matrix - sample_rho_matrix)
        self.assertTrue(
            np.allclose(tst_rho_matrix - sample_rho_matrix, 0, atol=0.10))
        self.assertTrue(
            np.allclose(tst_ktau_matrix - sample_ktau_matrix, 0, atol=0.10))

        # fit marginal distributions to original data
        marginal_dict = {}
        for col_name in tstData.columns:
            marginal_dict[col_name] = beta(*beta.fit(tstData[col_name]))
        # scale the samples
        c_vine_scaled_samples_a = tstVine.scaleSamples(c_vine_samples,
                                                       marginal_dict)
        matrixPairPlot(c_vine_scaled_samples_a,
                       savefig="vine_varaite_resampled_scaled_a.png")

        c_vine_scaled_samples_b = tstVine.sampleScale(8000, marginal_dict)

        # compute correlation coeffs
        sample_scaled_rho_matrix_a = c_vine_scaled_samples_a.corr(
            method='pearson')
        sample_scaled_rho_matrix_b = c_vine_scaled_samples_b.corr(
            method='pearson')

        # check for consistency
        self.assertTrue(
            np.allclose(tst_rho_matrix - sample_scaled_rho_matrix_a,
                        0,
                        atol=0.1))
        self.assertTrue(
            np.allclose(tst_rho_matrix - sample_scaled_rho_matrix_b,
                        0,
                        atol=0.1))
示例#6
0
def main():
    # read data from external h5 file
    h5file = 'Cicada_cfd_180x_cht.h5.post.binned.h5'
    # store = pd.HDFStore(h5file)
    store = pt.open_file(h5file)
    bounds = h5Load(store, "Water/UO2 [Interface 1]/Temperature_bounds")
    temperature = h5Load(store, "Water/UO2 [Interface 1]/Temperature")
    tke = h5Load(store, "Water/UO2 [Interface 1]/TurbulentKineticEnergy")
    crud_thick = h5Load(store, "Water/UO2 [Interface 1]/CrudThickness")
    b10 = h5Load(store, "Water/UO2 [Interface 1]/CrudBoronDensity")
    weight = h5Load(store, "Water/UO2 [Interface 1]/Temperature_weights")
    bhf = h5Load(store, "Water/UO2 [Interface 1]/BoundaryHeatFlux")

    # SPAN
    tsat = -618.5
    zones = range(65, 98)
    for zone in zones:
        zBounds = bounds.read()[:, zone][~np.isnan(bounds.read()[:, zone])]
        temps = temperature.read()[:,
                                   zone][~np.isnan(temperature.read()[:,
                                                                      zone])]
        tkes = tke.read()[:, zone][~np.isnan(tke.read()[:, zone])]
        cruds = crud_thick.read()[:,
                                  zone][~np.isnan(crud_thick.read()[:, zone])]
        b10s = b10.read()[:, zone][~np.isnan(b10.read()[:, zone])]
        bhfs = bhf.read()[:, zone][~np.isnan(bhf.read()[:, zone])]
        weights = weight.read()[:, zone][~np.isnan(weight.read()[:, zone])]
        span_1_dataDict = {
            "Residual Temperature [K]": temps,
            "Residual TKE [J/kg]": tkes,
            "Residual BHF [W/m^2]": bhfs,
        }
        span_1_mvd = mvd.Mvd()
        span_1_mvd.setData(span_1_dataDict, weights)
        upper_z, lower_z = zBounds
        bounds_label = str(lower_z) + "_" + str(upper_z)
        # span_1_mvd.plot(savefig=bounds_label + "_span.png", kde=False)

        # Construct Cvine
        lowerData = pd.DataFrame({"t": temps, "tke": tkes, "q": bhfs})
        lowerVine = Cvine(pd.DataFrame({"tke": tkes, "t": temps, "q": bhfs}))
        lowerVine.constructVine()

        # Sample Cvine
        lowerVineSamples = lowerVine.sample(n=500)
        matrixPairPlot(lowerVineSamples,
                       savefig="singlePinPlots/" + bounds_label +
                       "_vine_samples.png")
        ranked_data = lowerData.dropna().rank() / (len(lowerData) + 1)
        # matrixPairPlot(ranked_data, savefig="singlePinPlots/" + bounds_label + "_ranked_samples.png")
        t_hat_vine, tke_hat_vine, q_hat_vine = lowerVineSamples[
            't'], lowerVineSamples['tke'], lowerVineSamples['q']

        kde_cdf = gaussian_kde(temps).integrate_box
        resampled_t = icdf_uv_bisect(temps, t_hat_vine, kde_cdf)
        kde_cdf = gaussian_kde(tkes).integrate_box
        resampled_tke = icdf_uv_bisect(tkes, tke_hat_vine, kde_cdf)
        # bvc.bvJointPlot(resampled_t, resampled_tke, vs=[temps, tkes],
        #                 savefig="singlePinPlots/" + bounds_label + "_t_tke_resampled.png")

        # Grow crud at resampled points
        #crudModel = Mamba1d(len(resampled_t))

        # Compare resampled crud to original crud result

    # Clean up
    store.close()