def testWgtCopula(self): """! @brief Test ability to construct copula given samples with unequal weights. Compose two bivariate gauss dists, one with positive and one with negative depencence. Sample from dists. Assign large sample weights to positive gauss and low sample weights to neg gauss. Combine weighted samples into a single "X" shaped distribution. Refit weighted samples and ensure positive depencence """ # construct gaussian margins; mu={0, 0}, sd={1.0, 2} marg1 = Uvm("gauss")(1e-3, 1.) marg2 = Uvm("gauss")(1e-3, 2.) # construct gaussian copula positive dep cop1 = Copula("gauss") cop1.fittedParams = [0.7] # construct gaussian copula neg dep cop2 = Copula("gauss") cop2.fittedParams = [-0.7] # draw 1000 samples from each model n = 1000 rvs1 = marg1.rvs(size=n) rvs2 = marg2.rvs(size=n) x1, y1 = cop1.sampleScale(rvs1, rvs2, marg1.cdf, marg2.cdf) x2, y2 = cop2.sampleScale(rvs1, rvs2, marg1.cdf, marg2.cdf) # assign weights to each gauss sample group cop1_wgts = np.ones(n) * 0.95 cop2_wgts = np.ones(n) * 0.05 # combine both gauss models into dbl gauss model x = np.append(x1, x2) y = np.append(y1, y2) wgts = np.append(cop1_wgts, cop2_wgts) # plot data = pd.DataFrame([x, y]).T matrixPairPlot(data, weights=wgts, savefig='x_gauss_original.png') # fit copula to weighted data copModel = PairCopula(x, y, wgts) copModel.copulaTournament() # verify that a positive dep copula was produced with a # dep parameter of slightly less than 0.7 x_wt, y_wt = copModel.copulaModel.sampleScale(rvs1, rvs2, marg1.cdf, marg2.cdf) self.assertTrue(copModel.copulaModel.kTau() > 0.) self.assertTrue((copModel.copulaModel.fittedParams[0] > 0.) & (copModel.copulaModel.fittedParams[0] < 0.7)) # plot data = pd.DataFrame([x_wt, y_wt]).T matrixPairPlot(data, savefig='x_gauss_weighted_fit.png')
def testWgtResampledCopula(self): """! @brief Test ability to construct copula given samples with unequal weights using a resampling strat """ np.random.seed(123) # construct gaussian margins; mu={0, 0}, sd={1.0, 2} # marg1 = Uvm("gauss")(1e-3, 1.) marg1 = norm(loc=1e-3, scale=1.0) # marg2 = Uvm("gauss")(1e-3, 2.) marg2 = norm(loc=1e-3, scale=2.0) # construct gaussian copula positive dep cop1 = Copula("gauss") cop1.fittedParams = [0.7] # construct gaussian copula neg dep cop2 = Copula("gauss") cop2.fittedParams = [-0.7] # draw 1000 samples from each model n = 1000 x1, y1 = cop1.sampleScale(marg1, marg2, n) x2, y2 = cop2.sampleScale(marg1, marg2, n) # assign weights to each gauss sample group cop1_wgts = np.ones(n) * 0.95 cop2_wgts = np.ones(n) * 0.05 # combine both gauss models into dbl gauss model x = np.append(x1, x2) y = np.append(y1, y2) wgts = np.append(cop1_wgts, cop2_wgts) # fit copula to weighted data copModel = PairCopula(x, y, wgts, resample=10) copModel.copulaTournament() resampled_data = pd.DataFrame([copModel.x, copModel.y]).T matrixPairPlot(resampled_data, savefig='x_gauss_resampled.png') # verify that a positive dep copula was produced with a # dep parameter of slightly less than 0.7 x_wt, y_wt = copModel.copulaModel.sampleScale(marg1, marg2, n) self.assertTrue(copModel.copulaModel.kTau() > 0.) self.assertTrue((copModel.copulaModel.fittedParams[0] > 0.) & (copModel.copulaModel.fittedParams[0] < 0.7)) # plot data = pd.DataFrame([x_wt, y_wt]).T matrixPairPlot(data, savefig='x_gauss_resampled_fit.png')
def testCvineConstruct(self): stocks = np.loadtxt(dataDir + 'stocks.csv', delimiter=',') x = stocks[:, 0] y = stocks[:, 1] z = stocks[:, 4] p = stocks[:, 5] e = stocks[:, 6] # Create pandas data table tstData = pd.DataFrame() tstData['1a'] = x tstData['2b'] = y tstData['3c'] = z tstData['4d'] = p tstData['5e'] = e # Visualize multivar data matrixPairPlot(tstData, savefig="quad_varaite_ex.png") # Visualize multivar ranked data ranked_data = tstData.dropna().rank() / (len(tstData) + 1) # ranked_data['1a'] = ranked_data['1a'] matrixPairPlot(ranked_data, savefig="quad_varaite_ranked_ex.png") # Init Cvine tstVine = Cvine(ranked_data) # construct the vine tstVine.constructVine() # plot vine tstVine.plotVine(savefig="c_vine_graph_ex.png") # sample from vine samples = tstVine.sample(n=8000) matrixPairPlot(samples, savefig="quad_varaite_resampled_ex.png")
def main(): # read data from external h5 file h5file = 'Cicada_cfd_180x_cht.h5.post.binned.h5' # store = pd.HDFStore(h5file) store = pt.open_file(h5file) bounds = h5Load(store, "Water/UO2 [Interface 1]/Temperature_bounds") temperature = h5Load(store, "Water/UO2 [Interface 1]/Temperature") tke = h5Load(store, "Water/UO2 [Interface 1]/TurbulentKineticEnergy") crud_thick = h5Load(store, "Water/UO2 [Interface 1]/CrudThickness") b10 = h5Load(store, "Water/UO2 [Interface 1]/CrudBoronDensity") weight = h5Load(store, "Water/UO2 [Interface 1]/Temperature_weights") bhf = h5Load(store, "Water/UO2 [Interface 1]/BoundaryHeatFlux") """ # create multi-variate dataset for span 1 # for zone in range(69, 81): for zone in range(69, 78): lower_b = bounds.read()[:, zone][0] print("Generating plot for zone: " + str(zone)) temps = temperature.read()[:, zone][~np.isnan(temperature.read()[:, zone])] tkes = tke.read()[:, zone][~np.isnan(tke.read()[:, zone])] cruds = crud_thick.read()[:, zone][~np.isnan(crud_thick.read()[:, zone])] b10s = b10.read()[:, zone][~np.isnan(b10.read()[:, zone])] bhfs = bhf.read()[:, zone][~np.isnan(bhf.read()[:, zone])] weights = weight.read()[:, zone][~np.isnan(weight.read()[:, zone])] span_1_dataDict = {"Residual Temperature [K]": temps, "Residual TKE [J/kg]": tkes, "Residual BHF [W/m^2]": bhfs, } span_1_mvd = mvd.Mvd() span_1_mvd.setData(span_1_dataDict, weights) span_1_mvd.plot(savefig="mvd_" + str(round(lower_b, 3)) + ".png", kde=False) """ # upper span plot tsat = -618.5 zones = range(72, 74) temps = temperature.read()[:, zones][~np.isnan(temperature.read()[:, zones])] tkes = tke.read()[:, zones][~np.isnan(tke.read()[:, zones])] cruds = crud_thick.read()[:, zones][~np.isnan(crud_thick.read()[:, zones])] b10s = b10.read()[:, zones][~np.isnan(b10.read()[:, zones])] bhfs = bhf.read()[:, zones][~np.isnan(bhf.read()[:, zones])] weights = weight.read()[:, zones][~np.isnan(weight.read()[:, zones])] span_1_dataDict = { "Residual Temperature [K]": temps, "Residual TKE [J/kg]": tkes, "Residual BHF [W/m^2]": bhfs, } span_1_mvd = mvd.Mvd() span_1_mvd.setData(span_1_dataDict, weights) span_1_mvd.plot(savefig="upper_span.png", kde=False) # fit bivariate copula to span plot; T vs TKE: # copula = bvc.PairCopula(temps, tkes) # copula.copulaTournament() # init Cvine print("================= Construct Upper Vine =================") upperData = pd.DataFrame({"t": temps, "tke": tkes, "q": bhfs}) upperVine = Cvine(pd.DataFrame({"t": temps, "tke": tkes, "q": bhfs})) upperVine.constructVine() upperVine.plotVine(savefig="upper_vine.png") print("========================================================") upperVineSamples = upperVine.sample(n=500) plt.figure(22) matrixPairPlot(upperVineSamples, savefig="upper_vine_samples.png") upper_ranked_data = upperData.dropna().rank() / (len(upperData) + 1) matrixPairPlot(upper_ranked_data, savefig="upper_ranked_samples.png") t_hat_vine, tke_hat_vine, q_hat_vine = upperVineSamples[ 't'], upperVineSamples['tke'], upperVineSamples['q'] # plot original # bvc.bvJointPlot(temps, tkes, savefig="upper_t_tke_original.png") # sample from copula # print("Copula Params: " + str(copula.copulaParams)) # t_hat, tke_hat = copula.copulaModel.sample(500) # bvc.bvJointPlot(t_hat_vine, tke_hat_vine, savefig="upper_t_tke_copula_sample.png") # rand_u = np.linspace(0.05, 0.95, 40) # rand_v = np.linspace(0.05, 0.95, 40) # u, v = np.meshgrid(rand_u, rand_v) # copula_pdf = copula.copulaModel.pdf(u.flatten(), v.flatten()) # bvc.bvContourf(u.flatten(), v.flatten(), copula_pdf, savefig="upper_t_tke_copula_pdf.png") # Resample original data def icdf_uv_bisect(ux, X, marginalCDFModel): icdf = np.zeros(np.array(X).size) for i, xx in enumerate(X): kde_cdf_err = lambda m: xx - marginalCDFModel(-np.inf, m) try: icdf[i] = bisect(kde_cdf_err, min(ux) - np.abs(0.5 * min(ux)), max(ux) + np.abs(0.5 * max(ux)), xtol=1e-3, maxiter=15) icdf[i] = newton(kde_cdf_err, icdf[i], tol=1e-6, maxiter=20) except: icdf[i] = np.nan return icdf kde_cdf = gaussian_kde(temps).integrate_box resampled_t = icdf_uv_bisect(temps, t_hat_vine, kde_cdf) kde_cdf = gaussian_kde(tkes).integrate_box resampled_tke = icdf_uv_bisect(tkes, tke_hat_vine, kde_cdf) bvc.bvJointPlot(resampled_t, resampled_tke, vs=[temps, tkes], savefig="upper_t_tke_resampled.png") # LOWER SPAN tsat = -618.5 zones = range(70, 71) temps = temperature.read()[:, zones][~np.isnan(temperature.read()[:, zones])] tkes = tke.read()[:, zones][~np.isnan(tke.read()[:, zones])] cruds = crud_thick.read()[:, zones][~np.isnan(crud_thick.read()[:, zones])] b10s = b10.read()[:, zones][~np.isnan(b10.read()[:, zones])] bhfs = bhf.read()[:, zones][~np.isnan(bhf.read()[:, zones])] weights = weight.read()[:, zones][~np.isnan(weight.read()[:, zones])] span_1_dataDict = { "Residual Temperature [K]": temps, "Residual TKE [J/kg]": tkes, "Residual BHF [W/m^2]": bhfs, } span_1_mvd = mvd.Mvd() span_1_mvd.setData(span_1_dataDict, weights) span_1_mvd.plot(savefig="lower_span.png", kde=False) # fit bivariate copula to span plot; T vs TKE: # copula = bvc.PairCopula(temps, tkes) # copula.copulaTournament() # init Cvine print("================= Construct Lower Vine =================") lowerData = pd.DataFrame({"t": temps, "tke": tkes, "q": bhfs}) lowerVine = Cvine(pd.DataFrame({"tke": tkes, "t": temps, "q": bhfs})) lowerVine.constructVine() plt.figure(20) lowerVine.plotVine(savefig="lower_vine.png") print("========================================================") lowerVineSamples = lowerVine.sample(n=500) matrixPairPlot(lowerVineSamples, savefig="lower_vine_samples.png") lower_ranked_data = lowerData.dropna().rank() / (len(lowerData) + 1) matrixPairPlot(lower_ranked_data, savefig="lower_ranked_samples.png") t_hat_vine, tke_hat_vine, q_hat_vine = lowerVineSamples[ 't'], lowerVineSamples['tke'], lowerVineSamples['q'] # plot original # bvc.bvJointPlot(temps, tkes, savefig="lower_t_tke_original.png") # sample from copula # print("Copula Params: " + str(copula.copulaParams)) # t_hat, tke_hat = copula.copulaModel.sample(500) # bvc.bvJointPlot(t_hat_vine, tke_hat_vine, savefig="lower_t_tke_copula_sample.png") # rand_u = np.linspace(0.05, 0.95, 40) # rand_v = np.linspace(0.05, 0.95, 40) # u, v = np.meshgrid(rand_u, rand_v) # copula_pdf = copula.copulaModel.pdf(u.flatten(), v.flatten()) # bvc.bvContourf(u.flatten(), v.flatten(), copula_pdf, savefig="lower_t_tke_copula_pdf.png") # Resample original data def icdf_uv_bisect(ux, X, marginalCDFModel): icdf = np.zeros(np.array(X).size) for i, xx in enumerate(X): kde_cdf_err = lambda m: xx - marginalCDFModel(-np.inf, m) try: icdf[i] = bisect(kde_cdf_err, min(ux) - np.abs(0.5 * min(ux)), max(ux) + np.abs(0.5 * max(ux)), xtol=1e-2, maxiter=10) icdf[i] = newton(kde_cdf_err, icdf[i], tol=1e-6, maxiter=20) except: icdf[i] = np.nan return icdf kde_cdf = gaussian_kde(temps).integrate_box resampled_t = icdf_uv_bisect(temps, t_hat_vine, kde_cdf) kde_cdf = gaussian_kde(tkes).integrate_box resampled_tke = icdf_uv_bisect(tkes, tke_hat_vine, kde_cdf) bvc.bvJointPlot(resampled_t, resampled_tke, vs=[temps, tkes], savefig="lower_t_tke_resampled.png") # Clean up store.close()
def testCvineConstruct(self): stocks = np.loadtxt(dataDir + 'stocks.csv', delimiter=',') x = stocks[:, 0] y = stocks[:, 1] z = stocks[:, 4] p = stocks[:, 5] e = stocks[:, 6] # Create pandas data table tstData = pd.DataFrame() tstData['1a'] = x tstData['2b'] = y tstData['3c'] = z tstData['4d'] = p tstData['5e'] = e # Visualize multivar data matrixPairPlot(tstData, savefig="quad_varaite_ex.png") # Visualize multivar ranked data ranked_data = tstData.dropna().rank() / (len(tstData) + 1) # ranked_data['1a'] = ranked_data['1a'] matrixPairPlot(ranked_data, savefig="quad_varaite_ranked_ex.png") # Init Cvine tstVine = Cvine(ranked_data) # construct the vine tstVine.constructVine() # plot vine tstVine.plotVine(savefig="c_vine_graph_ex.png") # sample from vine c_vine_samples = tstVine.sample(n=8000) matrixPairPlot(c_vine_samples, savefig="vine_resampled_ex.png") # check that the original data has same correlation coefficients as re-sampled # data from the fitted c-vine tst_rho_matrix = ranked_data.corr(method='pearson') tst_ktau_matrix = ranked_data.corr(method='kendall') sample_rho_matrix = c_vine_samples.corr(method='pearson') sample_ktau_matrix = c_vine_samples.corr(method='kendall') # sort by col labels tst_rho_matrix = tst_rho_matrix.reindex(sorted(tst_rho_matrix.columns), axis=1) tst_ktau_matrix = tst_ktau_matrix.reindex(sorted( tst_ktau_matrix.columns), axis=1) sample_rho_matrix = sample_rho_matrix.reindex(sorted( sample_rho_matrix.columns), axis=1) sample_ktau_matrix = sample_ktau_matrix.reindex(sorted( sample_ktau_matrix.columns), axis=1) print("Original data corr matrix:") print(tst_rho_matrix) print("Vine sample corr matrix:") print(sample_rho_matrix) print("Diff:") print(tst_rho_matrix - sample_rho_matrix) self.assertTrue( np.allclose(tst_rho_matrix - sample_rho_matrix, 0, atol=0.10)) self.assertTrue( np.allclose(tst_ktau_matrix - sample_ktau_matrix, 0, atol=0.10)) # fit marginal distributions to original data marginal_dict = {} for col_name in tstData.columns: marginal_dict[col_name] = beta(*beta.fit(tstData[col_name])) # scale the samples c_vine_scaled_samples_a = tstVine.scaleSamples(c_vine_samples, marginal_dict) matrixPairPlot(c_vine_scaled_samples_a, savefig="vine_varaite_resampled_scaled_a.png") c_vine_scaled_samples_b = tstVine.sampleScale(8000, marginal_dict) # compute correlation coeffs sample_scaled_rho_matrix_a = c_vine_scaled_samples_a.corr( method='pearson') sample_scaled_rho_matrix_b = c_vine_scaled_samples_b.corr( method='pearson') # check for consistency self.assertTrue( np.allclose(tst_rho_matrix - sample_scaled_rho_matrix_a, 0, atol=0.1)) self.assertTrue( np.allclose(tst_rho_matrix - sample_scaled_rho_matrix_b, 0, atol=0.1))
def main(): # read data from external h5 file h5file = 'Cicada_cfd_180x_cht.h5.post.binned.h5' # store = pd.HDFStore(h5file) store = pt.open_file(h5file) bounds = h5Load(store, "Water/UO2 [Interface 1]/Temperature_bounds") temperature = h5Load(store, "Water/UO2 [Interface 1]/Temperature") tke = h5Load(store, "Water/UO2 [Interface 1]/TurbulentKineticEnergy") crud_thick = h5Load(store, "Water/UO2 [Interface 1]/CrudThickness") b10 = h5Load(store, "Water/UO2 [Interface 1]/CrudBoronDensity") weight = h5Load(store, "Water/UO2 [Interface 1]/Temperature_weights") bhf = h5Load(store, "Water/UO2 [Interface 1]/BoundaryHeatFlux") # SPAN tsat = -618.5 zones = range(65, 98) for zone in zones: zBounds = bounds.read()[:, zone][~np.isnan(bounds.read()[:, zone])] temps = temperature.read()[:, zone][~np.isnan(temperature.read()[:, zone])] tkes = tke.read()[:, zone][~np.isnan(tke.read()[:, zone])] cruds = crud_thick.read()[:, zone][~np.isnan(crud_thick.read()[:, zone])] b10s = b10.read()[:, zone][~np.isnan(b10.read()[:, zone])] bhfs = bhf.read()[:, zone][~np.isnan(bhf.read()[:, zone])] weights = weight.read()[:, zone][~np.isnan(weight.read()[:, zone])] span_1_dataDict = { "Residual Temperature [K]": temps, "Residual TKE [J/kg]": tkes, "Residual BHF [W/m^2]": bhfs, } span_1_mvd = mvd.Mvd() span_1_mvd.setData(span_1_dataDict, weights) upper_z, lower_z = zBounds bounds_label = str(lower_z) + "_" + str(upper_z) # span_1_mvd.plot(savefig=bounds_label + "_span.png", kde=False) # Construct Cvine lowerData = pd.DataFrame({"t": temps, "tke": tkes, "q": bhfs}) lowerVine = Cvine(pd.DataFrame({"tke": tkes, "t": temps, "q": bhfs})) lowerVine.constructVine() # Sample Cvine lowerVineSamples = lowerVine.sample(n=500) matrixPairPlot(lowerVineSamples, savefig="singlePinPlots/" + bounds_label + "_vine_samples.png") ranked_data = lowerData.dropna().rank() / (len(lowerData) + 1) # matrixPairPlot(ranked_data, savefig="singlePinPlots/" + bounds_label + "_ranked_samples.png") t_hat_vine, tke_hat_vine, q_hat_vine = lowerVineSamples[ 't'], lowerVineSamples['tke'], lowerVineSamples['q'] kde_cdf = gaussian_kde(temps).integrate_box resampled_t = icdf_uv_bisect(temps, t_hat_vine, kde_cdf) kde_cdf = gaussian_kde(tkes).integrate_box resampled_tke = icdf_uv_bisect(tkes, tke_hat_vine, kde_cdf) # bvc.bvJointPlot(resampled_t, resampled_tke, vs=[temps, tkes], # savefig="singlePinPlots/" + bounds_label + "_t_tke_resampled.png") # Grow crud at resampled points #crudModel = Mamba1d(len(resampled_t)) # Compare resampled crud to original crud result # Clean up store.close()