def compare_convergence(configs, basis_list=['edge', 'hub', 'uniform'], burnin=0, thin=1, k=100, temper=None): n, n_obs = configs['n'], configs['n_obs'] nrows = len(configs['true_graph']) ncols = len(basis_list) data = np.zeros((nrows, ncols)) for i in range(len(basis_list)): configs['basis'] = basis_list[i] config_l = get_config_l(configs) data[:, i] = [ round(get_conv(c, burnin, thin, k=k, temper=temper), 2) for c in config_l ] basis_names = [BETTER_NAMES[s] for s in basis_list] columns = pd.MultiIndex.from_product([['convergence'], basis_names]) indexes = [BETTER_NAMES[s] for s in configs['true_graph']] df = pd.DataFrame(data, index=indexes, columns=columns) return df
def compare_as_idx(configs, basis_list=['edge', 'hub', 'uniform'], burnin=0, thin=1, temper=None): n, n_obs = configs['n'], configs['n_obs'] nrows = len(configs['true_graph']) ncols = len(basis_list) * 2 data = np.zeros((nrows, ncols)) for i in range(len(basis_list)): configs['basis'] = basis_list[i] config_l = get_config_l(configs) vars = get_as_idx(config_l, burnin, thin, temper) data[:, i] = [tup[0] for tup in vars] data[:, len(basis_list) + i] = [tup[1] for tup in vars] basis_names = [BETTER_NAMES[s] for s in basis_list] columns = pd.MultiIndex.from_product([['as_start_idx', 'as_end_idx'], basis_names]) indexes = [BETTER_NAMES[s] for s in configs['true_graph']] df = pd.DataFrame(data, index=indexes, columns=columns) return df
def compare_acceptance(configs, basis_list=['edge', 'hub', 'uniform'], burnin=0, thin=1, temper=None): n, n_obs = configs['n'], configs['n_obs'] nrows = len(configs['true_graph']) ncols = len(basis_list) * 2 data = np.zeros((nrows, ncols)) for i in range(len(basis_list)): configs['basis'] = basis_list[i] config_l = get_config_l(configs) summ = [get_summary(c, burnin, thin, temper) for c in config_l] data[:, i] = [round(x['accept_rate'], 3) for x in summ] data[:, len(basis_list) + i] = [ round( x['tree_accept_ct'] / (configs['iter'] / configs['cob_freq']), 3) for x in summ ] basis_names = [BETTER_NAMES[s] for s in basis_list] columns = pd.MultiIndex.from_product([['accept', 'accept_tree'], basis_names]) indexes = [BETTER_NAMES[s] for s in configs['true_graph']] df = pd.DataFrame(data, index=indexes, columns=columns) return df
def compare_F1s(configs, basis_list=['edge', 'hub', 'uniform'], burnin=0, thin=1, percentile=.5, temper=None): n, n_obs = configs['n'], configs['n_obs'] nrows = len(configs['true_graph']) ncols = len(basis_list) data = np.zeros((nrows, ncols)) k = PERC_TO_IDX[percentile] for i in range(len(basis_list)): configs['basis'] = basis_list[i] config_l = get_config_l(configs) f1_l = [get_F1s(c, burnin, thin, temper) for c in config_l] data[:, i] = [round(x[k], 3) for x in f1_l] basis_names = [BETTER_NAMES[s] for s in basis_list] columns = pd.MultiIndex.from_product([['F1s'], basis_names]) indexes = [BETTER_NAMES[s] for s in configs['true_graph']] df = pd.DataFrame(data, index=indexes, columns=columns) return df
def compare_n_states(configs, basis_list=['edge', 'hub', 'uniform'], burnin=0, thin=1, percentile=.5, temper=None): n, n_obs = configs['n'], configs['n_obs'] nrows = len(configs['true_graph']) ncols = len(basis_list) * 2 data = np.zeros((nrows, ncols)) for i in range(len(basis_list)): configs['basis'] = basis_list[i] config_l = get_config_l(configs) summ = [get_summary(c, burnin, thin, temper) for c in config_l] data[:, i] = [x['states_visited'] for x in summ] data[:, len(basis_list) + i] = [x['states_considered'] for x in summ] basis_names = [BETTER_NAMES[s] for s in basis_list] columns = pd.MultiIndex.from_product( [['accepted_states', 'proposed_states'], basis_names]) indexes = [BETTER_NAMES[s] for s in configs['true_graph']] df = pd.DataFrame(data, index=indexes, columns=columns) return df
def compare_median_graphs(configs, threshold=.5, how=None): config_l = get_config_l(configs) varying_k, varying_v = _get_varying(configs) paths = [config_to_path(c) for c in config_l] cols = len(paths) fig, axs = plt.subplots(1, cols + 1, figsize=(10 * (cols + 1), 10)) n, n_obs, true_g = config_l[0]['n'], config_l[0]['n_obs'], config_l[0]['true_graph'] pos = Graph(n).GetCirclePos() with open(f"data/graph_{true_g}_{n}_{n_obs}.pkl", 'rb') as handle: g = pickle.load (handle) if how == 'circle': g.Draw(ax=axs[0], pos=pos) else: g.Draw(ax=axs[0]) axs[0].set_title('true_graph', fontsize=20) for i in range(cols): with open(config_to_path(config_l[i]), 'rb') as handle: sampler = pickle.load(handle) adjm = str_list_to_median_graph(n, sampler.res['SAMPLES'], threshold=threshold) g_ = Graph(n) g_.SetFromAdjM(adjm) if how == 'circle': g_.Draw(ax = axs[i + 1], pos=pos) else: g_.Draw(ax = axs[i + 1]) axs[i + 1].set_title(f"{varying_k}: {varying_v[i]}", fontsize=20) plt.show()
def compare_traces_short(configs, log=False, burnin=0): config_l = get_config_l(configs) varying_k, varying_v = _get_varying(configs) paths = [config_to_path(c) for c in config_l] cols = len(paths) post_traces = [] size_traces = [] basis_traces = [] init_bases = [] for c in config_l: with open(config_to_path(c)[:-4] + f"_burnin-0.short", 'rb') as handle: sampler = pickle.load(handle) post_traces.append(sampler.posteriors) size_traces.append(sampler.sizes) basis_traces.append(sampler.bases) init_bases.append(sampler.last_params._basis) fig, axs = plt.subplots(3, cols, figsize=(10 * (cols), 10 * 3)) for i in range(cols): axs[0, i].plot(post_traces[i][burnin:]) axs[1, i].plot(size_traces[i][burnin:]) axs[2, i].plot(basis_traces[i][burnin:]) for i in range(cols): axs[0, i].set_title(f"{varying_k}: {varying_v[i]}", fontsize=20) ylabs = ["MCMC posterior", "sizes", "n_basis"] for i in range(len(ylabs)): axs[i, 0].set_ylabel(ylabs[i], rotation= 90, fontsize=20) plt.show()
def compare_traces(configs, log=False, burnin=0): config_l = get_config_l(configs) varying_k, varying_v = _get_varying(configs) paths = [config_to_path(c) for c in config_l] cols = len(paths) all_visited_states = set() for c in config_l: with open(config_to_path(c), 'rb') as handle: sampler = pickle.load(handle) all_visited_states = all_visited_states.union(set(np.unique(sampler.res['SAMPLES']))) posts = [] post_traces = [] size_traces = [] basis_traces = [] init_bases = [] for c in config_l: with open(config_to_path(c), 'rb') as handle: sampler = pickle.load(handle) post = sampler_to_post_dict(sampler, list(all_visited_states)) if c['basis'] != 'edge': post = get_post_dict_cb_only(c['n'], post) posts.append(post) post_traces.append(np.array(sampler.res['LIK']) + np.array(sampler.res['PRIOR'])) size_traces.append(list(map(lambda s: np.sum(_str_to_int_list(s)), sampler.res['SAMPLES']))) basis_traces.append(_get_basis_ct(sampler)) init_bases.append(sampler.last_params._basis) fig, axs = plt.subplots(3, cols + 1, figsize=(10 * (cols + 1), 10 * 3)) for i in range(cols): plot_true_posterior(posts[i], log, ax=axs[0, 0], label=f"{varying_k}: {varying_v[i]}") plot_true_posterior_edge_marginalized(posts[i], log, ax=axs[1, 0], label=f"{varying_k}: {varying_v[i]}") if config_l[i]['cob_freq'] is None and config_l[i]['basis'] != 'edge': basis = init_bases[i] with open(config_to_path(c), 'rb') as handle: sampler = pickle.load(handle) plot_true_posterior_cb_marginalized(posts[i], basis, log, ax=axs[2, 0], sampler=sampler) axs[0, i + 1].plot(post_traces[i][burnin:]) axs[1, i + 1].plot(size_traces[i][burnin:]) axs[2, i + 1].plot(basis_traces[i][burnin:]) axs[0, 0].legend() axs[1, 0].legend() for i in range(cols): axs[0, i + 1].set_title(f"{varying_k}: {varying_v[i]}", fontsize=20) ylabs = ["MCMC posterior", "sizes", "n_basis"] for i in range(len(ylabs)): axs[i, 0].set_ylabel(ylabs[i], rotation= 90, fontsize=20) plt.show() return posts
def plot_end(configs, basis_list=['edge', 'hub', 'uniform'], burnin=0, thin=1, plot=False, temper=None): n, n_obs = configs['n'], configs['n_obs'] fig, axs = plt.subplots(len(configs['true_graph']), 3, figsize=(3 * 10, len(configs['true_graph']) * 10)) plt.rc('xtick', labelsize=30) plt.rc('ytick', labelsize=30) # Setting (shared) x and y labels names = [BETTER_NAMES[s] for s in configs['true_graph']] for i in range(len(configs['true_graph'])): axs[i, 0].set_ylabel(names[i], size=50) axs[0, 0].set_title('jaccard', size=50) axs[0, 1].set_title('hamming', size=50) axs[0, 2].set_title('sizes', size=50) # Getting Ranges jacc_max = [.0] * len(configs['true_graph']) hamm_max = [.0] * len(configs['true_graph']) size_max = [.0] * len(configs['true_graph']) for basis in basis_list: configs['basis'] = basis config_l = get_config_l(configs) summaries = [get_summary(c, burnin, thin, temper) for c in config_l] for i in range(len(summaries)): if len(summaries[i]['jaccard_distances_end']) == 0: print(config_to_path(config_l[i])) summaries[i]['jaccard_distances_end'] = [0] if len(summaries[i]['hamming_distances_end']) == 0: print(config_to_path(config_l[i])) summaries[i]['hamming_distances_end'] = [0] if len(summaries[i]['size_distances_end']) == 0: print(config_to_path(config_l[i])) summaries[i]['size_distances_end'] = [0] if np.max( summaries[i]['jaccard_distances_end']) * 100 > jacc_max[i]: jacc_max[i] = np.max( summaries[i]['jaccard_distances_end']) * 100 if np.max(summaries[i]['hamming_distances_end']) > hamm_max[i]: hamm_max[i] = np.max(summaries[i]['hamming_distances_end']) if np.max(summaries[i]['size_distances_end']) > size_max[i]: size_max[i] = np.max(summaries[i]['size_distances_end']) # Plotting for basis in basis_list: configs['basis'] = basis config_l = get_config_l(configs) summaries = [get_summary(c, burnin, thin, temper) for c in config_l] for i in range(len(summaries)): axs[i, 0].hist(summaries[i]['jaccard_distances_end'], bins=np.arange(jacc_max[i] + 1) / 100, label=BETTER_NAMES[basis], alpha=.5, density=True) axs[i, 1].hist(summaries[i]['hamming_distances_end'], bins=np.arange(hamm_max[i] + 1), label=BETTER_NAMES[basis], alpha=.5, density=True) axs[i, 2].hist(summaries[i]['size_distances_end'], bins=np.arange(size_max[i] + 1), label=BETTER_NAMES[basis], alpha=.5, density=True) axs[i, 0].legend(fontsize=30) axs[i, 1].legend(fontsize=30) axs[i, 2].legend(fontsize=30) fig.savefig(f"as_end_distr_n-{n}_n_obs-{n_obs}.pdf") if plot: plt.show() return fig
def plot_distances(configs, basis_list=['edge', 'hub', 'uniform'], burnin=0, thin=1, uniq=False, proposed=False, plot=False, y_ax_scale=1, temper=None): n, n_obs = configs['n'], configs['n_obs'] fig, axs = plt.subplots(len(configs['true_graph']), 3, figsize=(3 * 10, len(configs['true_graph']) * 10)) plt.rc('xtick', labelsize=30) plt.rc('ytick', labelsize=30) # Setting (shared) x and y labels names = [BETTER_NAMES[s] for s in configs['true_graph']] for i in range(len(configs['true_graph'])): axs[i, 0].set_ylabel(names[i], size=50) axs[0, 0].set_title('jaccard', size=50) axs[0, 1].set_title('hamming', size=50) axs[0, 2].set_title('sizes', size=50) # Getting Ranges jacc_max = [.0] * len(configs['true_graph']) hamm_max = [.0] * len(configs['true_graph']) size_max = [.0] * len(configs['true_graph']) for basis in basis_list: configs['basis'] = basis config_l = get_config_l(configs) summaries = [get_summary(c, burnin, thin, temper) for c in config_l] for i in range(len(summaries)): if not uniq and not proposed: if np.max( summaries[i]['jaccard_distances']) * 100 > jacc_max[i]: jacc_max[i] = np.max( summaries[i]['jaccard_distances']) * 100 if np.max(summaries[i]['hamming_distances']) > hamm_max[i]: hamm_max[i] = np.max(summaries[i]['hamming_distances']) if np.max(summaries[i]['size_distances']) > size_max[i]: size_max[i] = np.max(summaries[i]['size_distances']) elif uniq and not proposed: if np.max(summaries[i] ['jaccard_distances_uniq']) * 100 > jacc_max[i]: jacc_max[i] = np.max( summaries[i]['jaccard_distances_uniq']) * 100 if np.max( summaries[i]['hamming_distances_uniq']) > hamm_max[i]: hamm_max[i] = np.max( summaries[i]['hamming_distances_uniq']) if np.max(summaries[i]['size_distances_uniq']) > size_max[i]: size_max[i] = np.max(summaries[i]['size_distances_uniq']) elif proposed and not uniq: if np.max(summaries[i] ['jaccard_distances_']) * 100 > jacc_max[i]: jacc_max[i] = np.max( summaries[i]['jaccard_distances_']) * 100 if np.max(summaries[i]['hamming_distances_']) > hamm_max[i]: hamm_max[i] = np.max(summaries[i]['hamming_distances_']) if np.max(summaries[i]['size_distances_']) > size_max[i]: size_max[i] = np.max(summaries[i]['size_distances_']) else: if np.max(summaries[i] ['jaccard_distances_uniq_']) * 100 > jacc_max[i]: jacc_max[i] = np.max( summaries[i]['jaccard_distances_uniq_']) * 100 if np.max( summaries[i]['hamming_distances_uniq_']) > hamm_max[i]: hamm_max[i] = np.max( summaries[i]['hamming_distances_uniq_']) if np.max(summaries[i]['size_distances_uniq_']) > size_max[i]: size_max[i] = np.max(summaries[i]['size_distances_uniq_']) # Plotting for basis in basis_list: configs['basis'] = basis config_l = get_config_l(configs) summaries = [get_summary(c, burnin, thin, temper) for c in config_l] for i in range(len(summaries)): if not uniq and not proposed: axs[i, 0].hist( summaries[i]['jaccard_distances'], bins=np.arange(jacc_max[i] + 1) / 100, label=BETTER_NAMES[basis], alpha=.5, weights=np.ones_like(summaries[i]['jaccard_distances']) / y_ax_scale) axs[i, 1].hist( summaries[i]['hamming_distances'], bins=np.arange(hamm_max[i] + 1), label=BETTER_NAMES[basis], alpha=.5, weights=np.ones_like(summaries[i]['hamming_distances']) / y_ax_scale) axs[i, 2].hist( summaries[i]['size_distances'], bins=np.arange(size_max[i] + 1), label=BETTER_NAMES[basis], alpha=.5, weights=np.ones_like(summaries[i]['size_distances']) / y_ax_scale) elif uniq and not proposed: axs[i, 0].hist( summaries[i]['jaccard_distances_uniq'], bins=np.arange(jacc_max[i] + 1) / 100, label=BETTER_NAMES[basis], alpha=.5, weights=np.ones_like(summaries[i]['jaccard_distances']) / y_ax_scale) axs[i, 1].hist( summaries[i]['hamming_distances_uniq'], bins=np.arange(hamm_max[i] + 1), label=BETTER_NAMES[basis], alpha=.5, weights=np.ones_like(summaries[i]['hamming_distances']) / y_ax_scale) axs[i, 2].hist( summaries[i]['size_distances_uniq'], bins=np.arange(size_max[i] + 1), label=BETTER_NAMES[basis], alpha=.5, weights=np.ones_like(summaries[i]['size_distances']) / y_ax_scale) elif proposed and not uniq: axs[i, 0].hist( summaries[i]['jaccard_distances_'], bins=np.arange(jacc_max[i] + 1) / 100, label=BETTER_NAMES[basis], alpha=.5, weights=np.ones_like(summaries[i]['jaccard_distances']) / y_ax_scale) axs[i, 1].hist( summaries[i]['hamming_distances_'], bins=np.arange(hamm_max[i] + 1), label=BETTER_NAMES[basis], alpha=.5, weights=np.ones_like(summaries[i]['hamming_distances']) / y_ax_scale) axs[i, 2].hist( summaries[i]['size_distances_'], bins=np.arange(size_max[i] + 1), label=BETTER_NAMES[basis], alpha=.5, weights=np.ones_like(summaries[i]['size_distances']) / y_ax_scale) else: axs[i, 0].hist( summaries[i]['jaccard_distances_uniq_'], bins=np.arange(jacc_max[i] + 1) / 100, label=BETTER_NAMES[basis], alpha=.5, weights=np.ones_like(summaries[i]['jaccard_distances']) / y_ax_scale) axs[i, 1].hist( summaries[i]['hamming_distances_uniq_'], bins=np.arange(hamm_max[i] + 1), label=BETTER_NAMES[basis], alpha=.5, weights=np.ones_like(summaries[i]['hamming_distances']) / y_ax_scale) axs[i, 2].hist( summaries[i]['size_distances_uniq_'], bins=np.arange(size_max[i] + 1), label=BETTER_NAMES[basis], alpha=.5, weights=np.ones_like(summaries[i]['size_distances']) / y_ax_scale) axs[i, 0].legend(fontsize=30) axs[i, 1].legend(fontsize=30) axs[i, 2].legend(fontsize=30) if not uniq and not proposed: fig.savefig(f"distances_distr_n-{n}_n_obs-{n_obs}.pdf") elif uniq and not proposed: fig.savefig(f"distances_u_distr_n-{n}_n_obs-{n_obs}.pdf") elif proposed and not uniq: fig.savefig(f"distances_p_distr_n-{n}_n_obs-{n_obs}.pdf") else: fig.savefig(f"distances_u_p_distr_n-{n}_n_obs-{n_obs}.pdf") if plot: plt.show() return fig
["empty"], #, "circle", "random0", "random1", "random2", "random3"], 'prior': ['basis-count'], 'basis': ['hub', 'edge'], 'proposal': ['naive', 'BD'], 'cob_freq': [100], 'iter': [int(1e4)], 'seed': 123 }) def run(conf): n, n_obs = conf['n'], conf['n_obs'] name = conf['true_graph'] data = np.loadtxt(f"data/{name}_{n}_{n_obs}.dat", delimiter=',') sampler = run_config(data, conf) with open( f"data/graph_{conf['true_graph']}_{conf['n']}_{conf['n_obs']}.pkl", 'rb') as handle: g = pickle.load(handle) for burnin in [0, int(.1 * sampler.iter), int(.25 * sampler.iter)]: print(f"saving to {config_to_path(conf)[:-4]}_burnin-{burnin}.short") with open(config_to_path(conf)[:-4] + f"_burnin-{burnin}.short", 'wb') as handle: pickle.dump(sampler.get_summary(g, burnin, thin=100), handle) pool = Pool() pool.map(run, get_config_l(config))