def calculate_kld(data_1_fn,data_2_fn,names,n_samples=2000): assert isinstance(data_1_fn,str) assert isinstance(data_2_fn,str) assert isinstance(n_samples,int) assert os.path.isfile(data_1_fn) assert os.path.isfile(data_1_fn) data_1 = PyposmatDataFile() data_1.read(filename=data_1_fn) data_2 = PyposmatDataFile() data_2.read(filename=data_2_fn) w1,v1 = linalg.eig(np.cov(data_1.df[names].T)) w2,v2 = linalg.eig(np.cov(data_2.df[names].T)) cov1_ill_conditioned = any([k < 0 for k in w1.tolist()]) cov2_ill_conditioned = any([k < 0 for k in w2.tolist()]) any_ill_conditioned = any([cov1_ill_conditioned,cov2_ill_conditioned]) if any_ill_conditioned: print('using ill-conditioned kde') kde_1 = GaussianKde(data_1.df[names].T) print(kde_1.n, kde_1.d) kde_2 = GaussianKde(data_2.df[names].T) else: kde_1 = gaussian_kde(data_1.df[names].T) kde_2 = gaussian_kde(data_2.df[names].T) kld = kullbach_lieber_divergence(kde_1,kde_2,n_samples) return kld
def dev__kld_calculation_1d_kde(): n_samples_normal = 1000 n_samples_kde = 1000 rv_norm = norm(0,1) X_norm = rv_norm.rvs(size=1000) rv_kde_1 = gaussian_kde(X_norm) X_kde = rv_kde_1.resample(size=1000) rv_kde_2 = gaussian_kde(X_kde) kld = kullbach_lieber_divergence(rv_kde_1,rv_kde_2,1000) print(kld)
def test__kld_calculation_1d_kde(): n_samples_normal = 1000 n_samples_kde = 1000 rv_norm = norm(0,1) X_norm = rv_norm.rvs(size=1000) rv_kde_1 = gaussian_kde(X_norm) X_kde = rv_kde_1.resample(size=1000) rv_kde_2 = gaussian_kde(X_kde) kld = kullbach_lieber_divergence(rv_kde_1,rv_kde_2,1000) assert type(kld)==tuple assert kld[0]>0 assert kld[0]>0
def dev__kld_calculation_1d_kde(): print(80*'-') print('{:^80}'.format('dev__kld_calculation_1d_kde')) print(80*'-') n_samples_normal = 1000 n_samples_kde = 1000 rv_norm = norm(0,1) X_norm = rv_norm.rvs(size=1000) rv_kde_1 = gaussian_kde(X_norm) X_kde = rv_kde_1.resample(size=1000) rv_kde_2 = gaussian_kde(X_kde) assert isinstance(rv_kde_1,gaussian_kde) assert isinstance(rv_kde_1,gaussian_kde) kld = kullbach_lieber_divergence(rv_kde_1,rv_kde_2,1000) print(kld) return kld
except FileNotFoundError as e: print("the number of max iterations is actually {}".format(i - 1)) n_iterations = i - 1 break # comparing kde estimates print('compare kde estimates') kld = [-1] for i in range(1, n_iterations): df_0 = data['kde'][i - 1].df df_1 = data['kde'][i].df df_0_p = df_0[config.parameter_names] df_1_p = df_1[config.parameter_names] nr0, nc0 = df_0_p.shape nr1, nc1 = df_1_p.shape #print('nrows:: {}={},{}={}'.format(i,nr0,i+1,nr1)) #print('ncols:: {}={},{}={}'.format(i,nc0,i+1,nc1)) X0 = df_0_p.values X1 = df_1_p.values #print('X0:',X0.shape,type(X0)) silverman86_h0 = Silverman1986_h(X0.T) silverman86_h1 = Silverman1986_h(X1.T) chiu99_h0 = Chiu1999_h(X0.T) chiu99_h1 = Chiu1999_h(X1.T) kde_0 = gaussian_kde(X0.T, chiu99_h0) kde_1 = gaussian_kde(X1.T, chiu99_h1) kld.append(kullbach_lieber_divergence(kde_0, kde_1, 400)) print(i, silverman86_h1, chiu99_h1, kld[i]) for i, v in enumerate(kld): print(i, v)
A = A * n * np.eye(n) return A n_samples_normal = 1000 n_samples_kde = 1000 rv_norm = norm(0, 1) X_norm = rv_norm.rvs(size=1000) print('X_norm', X_norm.shape) rv_kde_1 = gaussian_kde(X_norm) X_kde = rv_kde_1.resample(size=1000) print('X_kde', X_kde.shape) rv_kde_2 = gaussian_kde(X_kde) kld = kullbach_lieber_divergence(rv_kde_1, rv_kde_2, 1000) print(kld) exit() import matplotlib.pyplot as plt xmin = min(X_norm.min(), X_kde.min()) xmax = max(X_norm.max(), X_kde.max()) x = np.linspace(xmin, xmax, 1000) fig, ax = plt.subplots() ax.plot(x, rv_norm.pdf(x)) ax.plot(x, rv_kde.pdf(x)) plt.show()
configuration_fn = test_case['Si__sw']['configuration_fn'] configuration = PyposmatConfigurationFile() configuration.read(filename=configuration_fn) free_parameter_names = configuration.free_parameter_names print('free_parameter_names:{}'.format(free_parameter_names)) for i in range(n_kde_files): if i > 0: print(80*'-') print('i_iteration:{}'.format(i)) data_directory = test_case['Si__sw']['data_directory'] kde_file_fn_1 = os.path.join(data_directory,'pyposmat.kde.{}.out'.format(i)) kde_file_fn_2 = os.path.join(data_directory,'pyposmat.kde.{}.out'.format(i+1)) print('kde_file_fn_1:{}'.format(kde_file_fn_1)) print('kde_file_fn_2:{}'.format(kde_file_fn_2)) kde_file_1 = PyposmatDataFile() kde_file_1.read(filename=kde_file_fn_1) kde_rv_1 = gaussian_kde(kde_file_1.df[free_parameter_names].T) kde_file_2 = PyposmatDataFile() kde_file_2.read(filename=kde_file_fn_2) kde_rv_2 = gaussian_kde(kde_file_2.df[free_parameter_names].T) kld = kullbach_lieber_divergence(kde_rv_1,kde_rv_2,1000) print(kld) dev__kld_calculation_1d_kde()