print "num_test:", len(X_test) kernel = RBF(input_dim=1, variance=1., lengthscale=1.) m = GPRegression(X_train, y_train, kernel) m.optimize() res = 100 pred_mean, pred_std = m.predict(X_test) plt.plot(X_test, pred_mean, 'b-') plt.plot(X_test, pred_mean + 2 * pred_std, 'b--') plt.plot(X_test, pred_mean - 2 * pred_std, 'b--') plt.plot(X_train, y_train, 'b.', markersize=3) plt.plot(X_test, y_test, 'r.', markersize=5) plt.grid(True) plt.xlabel(r"$X$") plt.ylabel(r"$y$") plt.savefig("gp_regression_data_fit.eps", bbox_inches='tight') plt.show() s = GaussianQuadraticTest(None) gradients = compute_gp_regression_gradients(y_test, pred_mean, pred_std) U_matrix, stat = s.get_statistic_multiple_custom_gradient(y_test[:, 0], gradients[:, 0]) num_test_samples = 10000 null_samples = bootstrap_null(U_matrix, num_bootstrap=num_test_samples) sns.distplot(null_samples, kde=False, norm_hist=True) plt.plot([stat, stat], [0, .012], 'black') plt.legend([r"$V_n$ test", r"Bootstrapped $B_n$"]) plt.xlabel(r"$V_n$") plt.ylabel(r"Frequency")
# filter out desired entries mask = (df[field] == df[field]) for k, v in conditions.items(): mask &= (df[k] == v) current = df.loc[mask] # only use desired values of x_fields current = current.loc[[True if x in x_field_values else False for x in current[x_field]]] # use ints on x-axis current[x_field] = current[x_field].astype(int) sns.set_style("whitegrid") sns.boxplot(x=x_field, y=field, data=current.sort(x_field)) plt.xlabel(field_plot_names[x_field]) plt.ylabel(field_plot_names[field]) plt.tight_layout() fname_base = os.path.splitext(fname)[0] plt.savefig(fname_base + ".png", bbox_inches='tight') plt.savefig(fname_base + ".eps", bbox_inches='tight') # print info on number of trials print(field) print("Average number of trials: %d" % int(np.round(current.groupby(x_field).apply(len).mean()))) print(current.groupby(x_field).apply(len)) plt.show()
mask = (df[field] == df[field]) for k, v in conditions.items(): mask &= (df[k] == v) current = df.loc[mask] # only use desired values of x_fields current = current.loc[[ True if x in x_field_values else False for x in current[x_field] ]] # use ints on x-axis current[x_field] = current[x_field].astype(int) sns.set_style("whitegrid") sns.boxplot(x=x_field, y=field, data=current.sort(x_field)) plt.xlabel(field_plot_names[x_field]) plt.ylabel(field_plot_names[field]) fname_base = os.path.splitext(fname)[0] plt.savefig(fname_base + ".png", bbox_inches='tight') plt.savefig(fname_base + ".eps", bbox_inches='tight') # print info on number of trials print(field) print("Average number of trials: %d" % int(np.round(current.groupby(x_field).apply(len).mean()))) print(current.groupby(x_field).apply(len)) plt.show()
thinning = np.argmin(np.abs(autocorrelation - 0.5)) + 1 return thinning, autocorrelation def normal_mild_corr(N): X = metropolis_hastings(log_normal, chain_size=N, thinning=1, x_prev=np.random.randn(), step=0.55) return X X = normal_mild_corr(TEST_CHAIN_SIZE) sgld_thinning, autocorr = get_thinning(X, 500) print('thinning for sgld t-student simulation ', sgld_thinning, autocorr[sgld_thinning]) X = normal_mild_corr(sgld_thinning * 100000) X = X[::sgld_thinning] r = acf(X, nlags=30) print(r) seaborn.set_style("whitegrid") plt.plot(r) plt.xlabel('lags') plt.ylabel('auto correlation') plt.ylim([0, 1]) plt.tight_layout() plt.savefig('../write_up/img/sgld_lags.eps')
def to_data_frame(arr, epsilon): data = [] for i, r in enumerate(arr): for eval in r: data.append([epsilon[i], eval]) return DataFrame(data) p_values = to_data_frame(p_values[::5], epsilon[::5]) # likelihood_evaluations = to_data_frame(likelihood_evaluations, epsilon) plt.figure() sns.boxplot(x=0, y=1, data=p_values, palette="BuGn_d") plt.ylabel("p values") plt.xlabel("epsilon") plt.tight_layout() plt.savefig('../../write_up/img/Heiko1.pdf') plt.figure() plt.plot(epsilon[::2], np.mean(likelihood_evaluations[::2], axis=1), 'g') plt.ylabel("likelihood evaluations") plt.xlabel("epsilon") plt.tight_layout() plt.savefig('../../write_up/img/Heiko2.pdf') # # f, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 4), sharex=True) # # sns.boxplot(x=0, y=1, data=p_values, palette="BuGn_d", ax=ax1) # ax1.set_ylabel("p values")
# estimate size of thinning def get_thinning(X,nlags = 50): autocorrelation = acf(X, nlags=nlags, fft=True) # find correlation closest to given v thinning = np.argmin(np.abs(autocorrelation - 0.5)) + 1 return thinning, autocorrelation def normal_mild_corr(N): X = metropolis_hastings(log_normal, chain_size=N, thinning=1, x_prev=np.random.randn(),step=0.55) return X X = normal_mild_corr(TEST_CHAIN_SIZE) sgld_thinning, autocorr = get_thinning(X,500) print('thinning for sgld t-student simulation ', sgld_thinning,autocorr[sgld_thinning]) X = normal_mild_corr(sgld_thinning *100000) X = X[::sgld_thinning] r= acf(X,nlags=30) print(r) seaborn.set_style("whitegrid") plt.plot(r) plt.xlabel('lags') plt.ylabel('auto correlation') plt.ylim([0,1]) plt.tight_layout() plt.savefig('../write_up/img/sgld_lags.eps')
pred_mean, pred_std = m.predict(X_test) X_test_plot = X_test[:, 0] * 116.502738394 + 1815.93213296 fig, ax = plt.subplots() plt.plot(X_test_plot, pred_mean, "r-") # plt.plot(X_test, pred_mean + 2 * pred_std, 'b--') # plt.plot(X_test, pred_mean - 2 * pred_std, 'b--') # some hacks to make x axis ok again lower = (pred_mean - 2 * pred_std)[:, 0] upper = (pred_mean + 2 * pred_std)[:, 0] plt.fill_between(X_test_plot, lower, upper, color="r", alpha=0.3) plt.plot(X_train * 116.502738394 + 1815.93213296, y_train, "b.", markersize=3) plt.plot(X_test_plot, y_test, "*", color="black", markersize=5) plt.grid(True) plt.xlabel(r"Year") plt.ylabel(r"Solar activity (normalised)") start, end = ax.get_xlim() ax.xaxis.set_ticks(np.arange(start, end, 100)) plt.savefig("gp_regression_data_fit.eps", bbox_inches="tight") plt.savefig("gp_regression_data_fit.pdf", bbox_inches="tight") exit() s = GaussianQuadraticTest(None) gradients = compute_gp_regression_gradients(y_test, pred_mean, pred_std) U_matrix, stat = s.get_statistic_multiple_custom_gradient(y_test[:, 0], gradients[:, 0]) num_test_samples = 10000 null_samples = bootstrap_null(U_matrix, num_bootstrap=num_test_samples) print "p-value:", 1.0 - np.mean(null_samples <= stat)
import numpy as np import seaborn from pandas import DataFrame from tools.latex_plot_init import plt results = np.load('results_good.npy') df = DataFrame(results) plt.figure() seaborn.set_style("whitegrid") seaborn.boxplot(x=0, y=1, data=df,palette="BuGn_d") plt.tight_layout() plt.ylabel('p values') plt.ylim([0,1]) plt.xlabel('degrees of freedom') plt.savefig('../write_up/img/sgld_student.pdf') results = np.load('results_bad.npy') df = DataFrame(results) plt.figure() seaborn.set_style("whitegrid") seaborn.boxplot(x=0, y=1, data=df,palette="BuGn_d") plt.tight_layout() plt.ylabel('p values') plt.ylim([0,1])
def to_data_frame(arr,epsilon): data = [] for i, r in enumerate(arr): for eval in r: data.append([epsilon[i], eval]) return DataFrame(data) p_values = to_data_frame(p_values[::5], epsilon[::5]) # likelihood_evaluations = to_data_frame(likelihood_evaluations, epsilon) plt.figure() sns.boxplot(x=0, y=1, data=p_values, palette="BuGn_d") plt.ylabel("p values") plt.xlabel("epsilon") plt.tight_layout() plt.savefig('../../write_up/img/Heiko1.pdf') plt.figure() plt.plot(epsilon[::2],np.mean(likelihood_evaluations[::2],axis=1),'g') plt.ylabel("likelihood evaluations") plt.xlabel("epsilon") plt.tight_layout() plt.savefig('../../write_up/img/Heiko2.pdf') # # f, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 4), sharex=True) # # sns.boxplot(x=0, y=1, data=p_values, palette="BuGn_d", ax=ax1) # ax1.set_ylabel("p values")