def testCvineConstruct(self): stocks = np.loadtxt(dataDir + 'stocks.csv', delimiter=',') x = stocks[:, 0] y = stocks[:, 1] z = stocks[:, 4] p = stocks[:, 5] e = stocks[:, 6] # Create pandas data table tstData = pd.DataFrame() tstData['1a'] = x tstData['2b'] = y tstData['3c'] = z tstData['4d'] = p tstData['5e'] = e # Visualize multivar data matrixPairPlot(tstData, savefig="quad_varaite_ex.png") # Visualize multivar ranked data ranked_data = tstData.dropna().rank() / (len(tstData) + 1) # ranked_data['1a'] = ranked_data['1a'] matrixPairPlot(ranked_data, savefig="quad_varaite_ranked_ex.png") # Init Cvine tstVine = Cvine(ranked_data) # construct the vine tstVine.constructVine() # plot vine tstVine.plotVine(savefig="c_vine_graph_ex.png") # sample from vine samples = tstVine.sample(n=8000) matrixPairPlot(samples, savefig="quad_varaite_resampled_ex.png")
def main(): # read data from external h5 file h5file = 'Cicada_cfd_180x_cht.h5.post.binned.h5' # store = pd.HDFStore(h5file) store = pt.open_file(h5file) bounds = h5Load(store, "Water/UO2 [Interface 1]/Temperature_bounds") temperature = h5Load(store, "Water/UO2 [Interface 1]/Temperature") tke = h5Load(store, "Water/UO2 [Interface 1]/TurbulentKineticEnergy") crud_thick = h5Load(store, "Water/UO2 [Interface 1]/CrudThickness") b10 = h5Load(store, "Water/UO2 [Interface 1]/CrudBoronDensity") weight = h5Load(store, "Water/UO2 [Interface 1]/Temperature_weights") bhf = h5Load(store, "Water/UO2 [Interface 1]/BoundaryHeatFlux") """ # create multi-variate dataset for span 1 # for zone in range(69, 81): for zone in range(69, 78): lower_b = bounds.read()[:, zone][0] print("Generating plot for zone: " + str(zone)) temps = temperature.read()[:, zone][~np.isnan(temperature.read()[:, zone])] tkes = tke.read()[:, zone][~np.isnan(tke.read()[:, zone])] cruds = crud_thick.read()[:, zone][~np.isnan(crud_thick.read()[:, zone])] b10s = b10.read()[:, zone][~np.isnan(b10.read()[:, zone])] bhfs = bhf.read()[:, zone][~np.isnan(bhf.read()[:, zone])] weights = weight.read()[:, zone][~np.isnan(weight.read()[:, zone])] span_1_dataDict = {"Residual Temperature [K]": temps, "Residual TKE [J/kg]": tkes, "Residual BHF [W/m^2]": bhfs, } span_1_mvd = mvd.Mvd() span_1_mvd.setData(span_1_dataDict, weights) span_1_mvd.plot(savefig="mvd_" + str(round(lower_b, 3)) + ".png", kde=False) """ # upper span plot tsat = -618.5 zones = range(72, 74) temps = temperature.read()[:, zones][~np.isnan(temperature.read()[:, zones])] tkes = tke.read()[:, zones][~np.isnan(tke.read()[:, zones])] cruds = crud_thick.read()[:, zones][~np.isnan(crud_thick.read()[:, zones])] b10s = b10.read()[:, zones][~np.isnan(b10.read()[:, zones])] bhfs = bhf.read()[:, zones][~np.isnan(bhf.read()[:, zones])] weights = weight.read()[:, zones][~np.isnan(weight.read()[:, zones])] span_1_dataDict = { "Residual Temperature [K]": temps, "Residual TKE [J/kg]": tkes, "Residual BHF [W/m^2]": bhfs, } span_1_mvd = mvd.Mvd() span_1_mvd.setData(span_1_dataDict, weights) span_1_mvd.plot(savefig="upper_span.png", kde=False) # fit bivariate copula to span plot; T vs TKE: # copula = bvc.PairCopula(temps, tkes) # copula.copulaTournament() # init Cvine print("================= Construct Upper Vine =================") upperData = pd.DataFrame({"t": temps, "tke": tkes, "q": bhfs}) upperVine = Cvine(pd.DataFrame({"t": temps, "tke": tkes, "q": bhfs})) upperVine.constructVine() upperVine.plotVine(savefig="upper_vine.png") print("========================================================") upperVineSamples = upperVine.sample(n=500) plt.figure(22) matrixPairPlot(upperVineSamples, savefig="upper_vine_samples.png") upper_ranked_data = upperData.dropna().rank() / (len(upperData) + 1) matrixPairPlot(upper_ranked_data, savefig="upper_ranked_samples.png") t_hat_vine, tke_hat_vine, q_hat_vine = upperVineSamples[ 't'], upperVineSamples['tke'], upperVineSamples['q'] # plot original # bvc.bvJointPlot(temps, tkes, savefig="upper_t_tke_original.png") # sample from copula # print("Copula Params: " + str(copula.copulaParams)) # t_hat, tke_hat = copula.copulaModel.sample(500) # bvc.bvJointPlot(t_hat_vine, tke_hat_vine, savefig="upper_t_tke_copula_sample.png") # rand_u = np.linspace(0.05, 0.95, 40) # rand_v = np.linspace(0.05, 0.95, 40) # u, v = np.meshgrid(rand_u, rand_v) # copula_pdf = copula.copulaModel.pdf(u.flatten(), v.flatten()) # bvc.bvContourf(u.flatten(), v.flatten(), copula_pdf, savefig="upper_t_tke_copula_pdf.png") # Resample original data def icdf_uv_bisect(ux, X, marginalCDFModel): icdf = np.zeros(np.array(X).size) for i, xx in enumerate(X): kde_cdf_err = lambda m: xx - marginalCDFModel(-np.inf, m) try: icdf[i] = bisect(kde_cdf_err, min(ux) - np.abs(0.5 * min(ux)), max(ux) + np.abs(0.5 * max(ux)), xtol=1e-3, maxiter=15) icdf[i] = newton(kde_cdf_err, icdf[i], tol=1e-6, maxiter=20) except: icdf[i] = np.nan return icdf kde_cdf = gaussian_kde(temps).integrate_box resampled_t = icdf_uv_bisect(temps, t_hat_vine, kde_cdf) kde_cdf = gaussian_kde(tkes).integrate_box resampled_tke = icdf_uv_bisect(tkes, tke_hat_vine, kde_cdf) bvc.bvJointPlot(resampled_t, resampled_tke, vs=[temps, tkes], savefig="upper_t_tke_resampled.png") # LOWER SPAN tsat = -618.5 zones = range(70, 71) temps = temperature.read()[:, zones][~np.isnan(temperature.read()[:, zones])] tkes = tke.read()[:, zones][~np.isnan(tke.read()[:, zones])] cruds = crud_thick.read()[:, zones][~np.isnan(crud_thick.read()[:, zones])] b10s = b10.read()[:, zones][~np.isnan(b10.read()[:, zones])] bhfs = bhf.read()[:, zones][~np.isnan(bhf.read()[:, zones])] weights = weight.read()[:, zones][~np.isnan(weight.read()[:, zones])] span_1_dataDict = { "Residual Temperature [K]": temps, "Residual TKE [J/kg]": tkes, "Residual BHF [W/m^2]": bhfs, } span_1_mvd = mvd.Mvd() span_1_mvd.setData(span_1_dataDict, weights) span_1_mvd.plot(savefig="lower_span.png", kde=False) # fit bivariate copula to span plot; T vs TKE: # copula = bvc.PairCopula(temps, tkes) # copula.copulaTournament() # init Cvine print("================= Construct Lower Vine =================") lowerData = pd.DataFrame({"t": temps, "tke": tkes, "q": bhfs}) lowerVine = Cvine(pd.DataFrame({"tke": tkes, "t": temps, "q": bhfs})) lowerVine.constructVine() plt.figure(20) lowerVine.plotVine(savefig="lower_vine.png") print("========================================================") lowerVineSamples = lowerVine.sample(n=500) matrixPairPlot(lowerVineSamples, savefig="lower_vine_samples.png") lower_ranked_data = lowerData.dropna().rank() / (len(lowerData) + 1) matrixPairPlot(lower_ranked_data, savefig="lower_ranked_samples.png") t_hat_vine, tke_hat_vine, q_hat_vine = lowerVineSamples[ 't'], lowerVineSamples['tke'], lowerVineSamples['q'] # plot original # bvc.bvJointPlot(temps, tkes, savefig="lower_t_tke_original.png") # sample from copula # print("Copula Params: " + str(copula.copulaParams)) # t_hat, tke_hat = copula.copulaModel.sample(500) # bvc.bvJointPlot(t_hat_vine, tke_hat_vine, savefig="lower_t_tke_copula_sample.png") # rand_u = np.linspace(0.05, 0.95, 40) # rand_v = np.linspace(0.05, 0.95, 40) # u, v = np.meshgrid(rand_u, rand_v) # copula_pdf = copula.copulaModel.pdf(u.flatten(), v.flatten()) # bvc.bvContourf(u.flatten(), v.flatten(), copula_pdf, savefig="lower_t_tke_copula_pdf.png") # Resample original data def icdf_uv_bisect(ux, X, marginalCDFModel): icdf = np.zeros(np.array(X).size) for i, xx in enumerate(X): kde_cdf_err = lambda m: xx - marginalCDFModel(-np.inf, m) try: icdf[i] = bisect(kde_cdf_err, min(ux) - np.abs(0.5 * min(ux)), max(ux) + np.abs(0.5 * max(ux)), xtol=1e-2, maxiter=10) icdf[i] = newton(kde_cdf_err, icdf[i], tol=1e-6, maxiter=20) except: icdf[i] = np.nan return icdf kde_cdf = gaussian_kde(temps).integrate_box resampled_t = icdf_uv_bisect(temps, t_hat_vine, kde_cdf) kde_cdf = gaussian_kde(tkes).integrate_box resampled_tke = icdf_uv_bisect(tkes, tke_hat_vine, kde_cdf) bvc.bvJointPlot(resampled_t, resampled_tke, vs=[temps, tkes], savefig="lower_t_tke_resampled.png") # Clean up store.close()
def testCvineConstruct(self): stocks = np.loadtxt(dataDir + 'stocks.csv', delimiter=',') x = stocks[:, 0] y = stocks[:, 1] z = stocks[:, 4] p = stocks[:, 5] e = stocks[:, 6] # Create pandas data table tstData = pd.DataFrame() tstData['1a'] = x tstData['2b'] = y tstData['3c'] = z tstData['4d'] = p tstData['5e'] = e # Visualize multivar data matrixPairPlot(tstData, savefig="quad_varaite_ex.png") # Visualize multivar ranked data ranked_data = tstData.dropna().rank() / (len(tstData) + 1) # ranked_data['1a'] = ranked_data['1a'] matrixPairPlot(ranked_data, savefig="quad_varaite_ranked_ex.png") # Init Cvine tstVine = Cvine(ranked_data) # construct the vine tstVine.constructVine() # plot vine tstVine.plotVine(savefig="c_vine_graph_ex.png") # sample from vine c_vine_samples = tstVine.sample(n=8000) matrixPairPlot(c_vine_samples, savefig="vine_resampled_ex.png") # check that the original data has same correlation coefficients as re-sampled # data from the fitted c-vine tst_rho_matrix = ranked_data.corr(method='pearson') tst_ktau_matrix = ranked_data.corr(method='kendall') sample_rho_matrix = c_vine_samples.corr(method='pearson') sample_ktau_matrix = c_vine_samples.corr(method='kendall') # sort by col labels tst_rho_matrix = tst_rho_matrix.reindex(sorted(tst_rho_matrix.columns), axis=1) tst_ktau_matrix = tst_ktau_matrix.reindex(sorted( tst_ktau_matrix.columns), axis=1) sample_rho_matrix = sample_rho_matrix.reindex(sorted( sample_rho_matrix.columns), axis=1) sample_ktau_matrix = sample_ktau_matrix.reindex(sorted( sample_ktau_matrix.columns), axis=1) print("Original data corr matrix:") print(tst_rho_matrix) print("Vine sample corr matrix:") print(sample_rho_matrix) print("Diff:") print(tst_rho_matrix - sample_rho_matrix) self.assertTrue( np.allclose(tst_rho_matrix - sample_rho_matrix, 0, atol=0.10)) self.assertTrue( np.allclose(tst_ktau_matrix - sample_ktau_matrix, 0, atol=0.10)) # fit marginal distributions to original data marginal_dict = {} for col_name in tstData.columns: marginal_dict[col_name] = beta(*beta.fit(tstData[col_name])) # scale the samples c_vine_scaled_samples_a = tstVine.scaleSamples(c_vine_samples, marginal_dict) matrixPairPlot(c_vine_scaled_samples_a, savefig="vine_varaite_resampled_scaled_a.png") c_vine_scaled_samples_b = tstVine.sampleScale(8000, marginal_dict) # compute correlation coeffs sample_scaled_rho_matrix_a = c_vine_scaled_samples_a.corr( method='pearson') sample_scaled_rho_matrix_b = c_vine_scaled_samples_b.corr( method='pearson') # check for consistency self.assertTrue( np.allclose(tst_rho_matrix - sample_scaled_rho_matrix_a, 0, atol=0.1)) self.assertTrue( np.allclose(tst_rho_matrix - sample_scaled_rho_matrix_b, 0, atol=0.1))