示例#1
0
def compare_convergence(configs,
                        basis_list=['edge', 'hub', 'uniform'],
                        burnin=0,
                        thin=1,
                        k=100,
                        temper=None):
    n, n_obs = configs['n'], configs['n_obs']

    nrows = len(configs['true_graph'])
    ncols = len(basis_list)
    data = np.zeros((nrows, ncols))

    for i in range(len(basis_list)):
        configs['basis'] = basis_list[i]
        config_l = get_config_l(configs)
        data[:, i] = [
            round(get_conv(c, burnin, thin, k=k, temper=temper), 2)
            for c in config_l
        ]

    basis_names = [BETTER_NAMES[s] for s in basis_list]
    columns = pd.MultiIndex.from_product([['convergence'], basis_names])
    indexes = [BETTER_NAMES[s] for s in configs['true_graph']]
    df = pd.DataFrame(data, index=indexes, columns=columns)

    return df
示例#2
0
def compare_as_idx(configs,
                   basis_list=['edge', 'hub', 'uniform'],
                   burnin=0,
                   thin=1,
                   temper=None):
    n, n_obs = configs['n'], configs['n_obs']

    nrows = len(configs['true_graph'])
    ncols = len(basis_list) * 2
    data = np.zeros((nrows, ncols))

    for i in range(len(basis_list)):
        configs['basis'] = basis_list[i]
        config_l = get_config_l(configs)
        vars = get_as_idx(config_l, burnin, thin, temper)
        data[:, i] = [tup[0] for tup in vars]
        data[:, len(basis_list) + i] = [tup[1] for tup in vars]

    basis_names = [BETTER_NAMES[s] for s in basis_list]
    columns = pd.MultiIndex.from_product([['as_start_idx', 'as_end_idx'],
                                          basis_names])
    indexes = [BETTER_NAMES[s] for s in configs['true_graph']]
    df = pd.DataFrame(data, index=indexes, columns=columns)

    return df
示例#3
0
def compare_acceptance(configs,
                       basis_list=['edge', 'hub', 'uniform'],
                       burnin=0,
                       thin=1,
                       temper=None):
    n, n_obs = configs['n'], configs['n_obs']

    nrows = len(configs['true_graph'])
    ncols = len(basis_list) * 2
    data = np.zeros((nrows, ncols))

    for i in range(len(basis_list)):
        configs['basis'] = basis_list[i]
        config_l = get_config_l(configs)
        summ = [get_summary(c, burnin, thin, temper) for c in config_l]
        data[:, i] = [round(x['accept_rate'], 3) for x in summ]
        data[:, len(basis_list) + i] = [
            round(
                x['tree_accept_ct'] / (configs['iter'] / configs['cob_freq']),
                3) for x in summ
        ]

    basis_names = [BETTER_NAMES[s] for s in basis_list]
    columns = pd.MultiIndex.from_product([['accept', 'accept_tree'],
                                          basis_names])
    indexes = [BETTER_NAMES[s] for s in configs['true_graph']]
    df = pd.DataFrame(data, index=indexes, columns=columns)

    return df
示例#4
0
def compare_F1s(configs,
                basis_list=['edge', 'hub', 'uniform'],
                burnin=0,
                thin=1,
                percentile=.5,
                temper=None):
    n, n_obs = configs['n'], configs['n_obs']

    nrows = len(configs['true_graph'])
    ncols = len(basis_list)
    data = np.zeros((nrows, ncols))

    k = PERC_TO_IDX[percentile]
    for i in range(len(basis_list)):
        configs['basis'] = basis_list[i]
        config_l = get_config_l(configs)
        f1_l = [get_F1s(c, burnin, thin, temper) for c in config_l]
        data[:, i] = [round(x[k], 3) for x in f1_l]

    basis_names = [BETTER_NAMES[s] for s in basis_list]
    columns = pd.MultiIndex.from_product([['F1s'], basis_names])
    indexes = [BETTER_NAMES[s] for s in configs['true_graph']]
    df = pd.DataFrame(data, index=indexes, columns=columns)

    return df
示例#5
0
def compare_n_states(configs,
                     basis_list=['edge', 'hub', 'uniform'],
                     burnin=0,
                     thin=1,
                     percentile=.5,
                     temper=None):
    n, n_obs = configs['n'], configs['n_obs']

    nrows = len(configs['true_graph'])
    ncols = len(basis_list) * 2
    data = np.zeros((nrows, ncols))

    for i in range(len(basis_list)):
        configs['basis'] = basis_list[i]
        config_l = get_config_l(configs)
        summ = [get_summary(c, burnin, thin, temper) for c in config_l]
        data[:, i] = [x['states_visited'] for x in summ]
        data[:, len(basis_list) + i] = [x['states_considered'] for x in summ]

    basis_names = [BETTER_NAMES[s] for s in basis_list]
    columns = pd.MultiIndex.from_product(
        [['accepted_states', 'proposed_states'], basis_names])
    indexes = [BETTER_NAMES[s] for s in configs['true_graph']]
    df = pd.DataFrame(data, index=indexes, columns=columns)

    return df
示例#6
0
def compare_median_graphs(configs, threshold=.5, how=None):
    config_l = get_config_l(configs)
    varying_k, varying_v = _get_varying(configs)
    paths = [config_to_path(c) for c in config_l]
    cols = len(paths)

    fig, axs = plt.subplots(1, cols + 1, figsize=(10 * (cols + 1), 10))

    n, n_obs, true_g = config_l[0]['n'], config_l[0]['n_obs'], config_l[0]['true_graph']
    pos = Graph(n).GetCirclePos()

    with open(f"data/graph_{true_g}_{n}_{n_obs}.pkl", 'rb') as handle:
        g = pickle.load (handle)
    if how == 'circle':
        g.Draw(ax=axs[0], pos=pos)
    else:
        g.Draw(ax=axs[0])
    axs[0].set_title('true_graph', fontsize=20)

    for i in range(cols):
        with open(config_to_path(config_l[i]), 'rb') as handle:
            sampler = pickle.load(handle)
        adjm = str_list_to_median_graph(n, sampler.res['SAMPLES'], threshold=threshold)
        g_ = Graph(n)
        g_.SetFromAdjM(adjm)
        if how == 'circle':
            g_.Draw(ax = axs[i + 1], pos=pos)
        else:
            g_.Draw(ax = axs[i + 1])
        axs[i + 1].set_title(f"{varying_k}: {varying_v[i]}", fontsize=20)

    plt.show()
示例#7
0
def compare_traces_short(configs, log=False, burnin=0):
    config_l = get_config_l(configs)
    varying_k, varying_v = _get_varying(configs)
    paths = [config_to_path(c) for c in config_l]
    cols = len(paths)

    post_traces = []
    size_traces = []
    basis_traces = []
    init_bases = []
    for c in config_l:
        with open(config_to_path(c)[:-4] + f"_burnin-0.short", 'rb') as handle:
            sampler = pickle.load(handle)
        post_traces.append(sampler.posteriors)
        size_traces.append(sampler.sizes)
        basis_traces.append(sampler.bases)
        init_bases.append(sampler.last_params._basis)

    fig, axs = plt.subplots(3, cols, figsize=(10 * (cols), 10 * 3))

    for i in range(cols):
        axs[0, i].plot(post_traces[i][burnin:])
        axs[1, i].plot(size_traces[i][burnin:])
        axs[2, i].plot(basis_traces[i][burnin:])

    for i in range(cols):
        axs[0, i].set_title(f"{varying_k}: {varying_v[i]}", fontsize=20)

    ylabs = ["MCMC posterior", "sizes", "n_basis"]
    for i in range(len(ylabs)):
        axs[i, 0].set_ylabel(ylabs[i], rotation= 90, fontsize=20)

    plt.show()
示例#8
0
def compare_traces(configs, log=False, burnin=0):
    config_l = get_config_l(configs)
    varying_k, varying_v = _get_varying(configs)
    paths = [config_to_path(c) for c in config_l]
    cols = len(paths)

    all_visited_states = set()
    for c in config_l:
        with open(config_to_path(c), 'rb') as handle:
            sampler = pickle.load(handle)
        all_visited_states = all_visited_states.union(set(np.unique(sampler.res['SAMPLES'])))

    posts = []
    post_traces = []
    size_traces = []
    basis_traces = []
    init_bases = []
    for c in config_l:
        with open(config_to_path(c), 'rb') as handle:
            sampler = pickle.load(handle)
        post = sampler_to_post_dict(sampler, list(all_visited_states))
        if c['basis'] != 'edge':
            post = get_post_dict_cb_only(c['n'], post)
        posts.append(post)
        post_traces.append(np.array(sampler.res['LIK']) + np.array(sampler.res['PRIOR']))
        size_traces.append(list(map(lambda s: np.sum(_str_to_int_list(s)), sampler.res['SAMPLES'])))
        basis_traces.append(_get_basis_ct(sampler))
        init_bases.append(sampler.last_params._basis)


    fig, axs = plt.subplots(3, cols + 1, figsize=(10 * (cols + 1), 10 * 3))

    for i in range(cols):
        plot_true_posterior(posts[i], log, ax=axs[0, 0], label=f"{varying_k}: {varying_v[i]}")
        plot_true_posterior_edge_marginalized(posts[i], log, ax=axs[1, 0], label=f"{varying_k}: {varying_v[i]}")

        if config_l[i]['cob_freq'] is None and config_l[i]['basis'] != 'edge':
            basis = init_bases[i]
            with open(config_to_path(c), 'rb') as handle:
                sampler = pickle.load(handle)
            plot_true_posterior_cb_marginalized(posts[i], basis, log, ax=axs[2, 0], sampler=sampler)


        axs[0, i + 1].plot(post_traces[i][burnin:])
        axs[1, i + 1].plot(size_traces[i][burnin:])
        axs[2, i + 1].plot(basis_traces[i][burnin:])

    axs[0, 0].legend()
    axs[1, 0].legend()

    for i in range(cols):
        axs[0, i + 1].set_title(f"{varying_k}: {varying_v[i]}", fontsize=20)

    ylabs = ["MCMC posterior", "sizes", "n_basis"]
    for i in range(len(ylabs)):
        axs[i, 0].set_ylabel(ylabs[i], rotation= 90, fontsize=20)

    plt.show()
    return posts
示例#9
0
def plot_end(configs,
             basis_list=['edge', 'hub', 'uniform'],
             burnin=0,
             thin=1,
             plot=False,
             temper=None):
    n, n_obs = configs['n'], configs['n_obs']
    fig, axs = plt.subplots(len(configs['true_graph']),
                            3,
                            figsize=(3 * 10, len(configs['true_graph']) * 10))
    plt.rc('xtick', labelsize=30)
    plt.rc('ytick', labelsize=30)

    # Setting (shared) x and y labels
    names = [BETTER_NAMES[s] for s in configs['true_graph']]

    for i in range(len(configs['true_graph'])):
        axs[i, 0].set_ylabel(names[i], size=50)

    axs[0, 0].set_title('jaccard', size=50)
    axs[0, 1].set_title('hamming', size=50)
    axs[0, 2].set_title('sizes', size=50)

    # Getting Ranges
    jacc_max = [.0] * len(configs['true_graph'])
    hamm_max = [.0] * len(configs['true_graph'])
    size_max = [.0] * len(configs['true_graph'])
    for basis in basis_list:
        configs['basis'] = basis
        config_l = get_config_l(configs)
        summaries = [get_summary(c, burnin, thin, temper) for c in config_l]

        for i in range(len(summaries)):
            if len(summaries[i]['jaccard_distances_end']) == 0:
                print(config_to_path(config_l[i]))
                summaries[i]['jaccard_distances_end'] = [0]
            if len(summaries[i]['hamming_distances_end']) == 0:
                print(config_to_path(config_l[i]))
                summaries[i]['hamming_distances_end'] = [0]
            if len(summaries[i]['size_distances_end']) == 0:
                print(config_to_path(config_l[i]))
                summaries[i]['size_distances_end'] = [0]

            if np.max(
                    summaries[i]['jaccard_distances_end']) * 100 > jacc_max[i]:
                jacc_max[i] = np.max(
                    summaries[i]['jaccard_distances_end']) * 100
            if np.max(summaries[i]['hamming_distances_end']) > hamm_max[i]:
                hamm_max[i] = np.max(summaries[i]['hamming_distances_end'])
            if np.max(summaries[i]['size_distances_end']) > size_max[i]:
                size_max[i] = np.max(summaries[i]['size_distances_end'])

    # Plotting
    for basis in basis_list:
        configs['basis'] = basis
        config_l = get_config_l(configs)
        summaries = [get_summary(c, burnin, thin, temper) for c in config_l]

        for i in range(len(summaries)):
            axs[i, 0].hist(summaries[i]['jaccard_distances_end'],
                           bins=np.arange(jacc_max[i] + 1) / 100,
                           label=BETTER_NAMES[basis],
                           alpha=.5,
                           density=True)
            axs[i, 1].hist(summaries[i]['hamming_distances_end'],
                           bins=np.arange(hamm_max[i] + 1),
                           label=BETTER_NAMES[basis],
                           alpha=.5,
                           density=True)
            axs[i, 2].hist(summaries[i]['size_distances_end'],
                           bins=np.arange(size_max[i] + 1),
                           label=BETTER_NAMES[basis],
                           alpha=.5,
                           density=True)

            axs[i, 0].legend(fontsize=30)
            axs[i, 1].legend(fontsize=30)
            axs[i, 2].legend(fontsize=30)

    fig.savefig(f"as_end_distr_n-{n}_n_obs-{n_obs}.pdf")

    if plot:
        plt.show()

    return fig
示例#10
0
def plot_distances(configs,
                   basis_list=['edge', 'hub', 'uniform'],
                   burnin=0,
                   thin=1,
                   uniq=False,
                   proposed=False,
                   plot=False,
                   y_ax_scale=1,
                   temper=None):
    n, n_obs = configs['n'], configs['n_obs']
    fig, axs = plt.subplots(len(configs['true_graph']),
                            3,
                            figsize=(3 * 10, len(configs['true_graph']) * 10))
    plt.rc('xtick', labelsize=30)
    plt.rc('ytick', labelsize=30)

    # Setting (shared) x and y labels
    names = [BETTER_NAMES[s] for s in configs['true_graph']]

    for i in range(len(configs['true_graph'])):
        axs[i, 0].set_ylabel(names[i], size=50)

    axs[0, 0].set_title('jaccard', size=50)
    axs[0, 1].set_title('hamming', size=50)
    axs[0, 2].set_title('sizes', size=50)

    # Getting Ranges
    jacc_max = [.0] * len(configs['true_graph'])
    hamm_max = [.0] * len(configs['true_graph'])
    size_max = [.0] * len(configs['true_graph'])
    for basis in basis_list:
        configs['basis'] = basis
        config_l = get_config_l(configs)
        summaries = [get_summary(c, burnin, thin, temper) for c in config_l]

        for i in range(len(summaries)):
            if not uniq and not proposed:
                if np.max(
                        summaries[i]['jaccard_distances']) * 100 > jacc_max[i]:
                    jacc_max[i] = np.max(
                        summaries[i]['jaccard_distances']) * 100
                if np.max(summaries[i]['hamming_distances']) > hamm_max[i]:
                    hamm_max[i] = np.max(summaries[i]['hamming_distances'])
                if np.max(summaries[i]['size_distances']) > size_max[i]:
                    size_max[i] = np.max(summaries[i]['size_distances'])
            elif uniq and not proposed:
                if np.max(summaries[i]
                          ['jaccard_distances_uniq']) * 100 > jacc_max[i]:
                    jacc_max[i] = np.max(
                        summaries[i]['jaccard_distances_uniq']) * 100
                if np.max(
                        summaries[i]['hamming_distances_uniq']) > hamm_max[i]:
                    hamm_max[i] = np.max(
                        summaries[i]['hamming_distances_uniq'])
                if np.max(summaries[i]['size_distances_uniq']) > size_max[i]:
                    size_max[i] = np.max(summaries[i]['size_distances_uniq'])
            elif proposed and not uniq:
                if np.max(summaries[i]
                          ['jaccard_distances_']) * 100 > jacc_max[i]:
                    jacc_max[i] = np.max(
                        summaries[i]['jaccard_distances_']) * 100
                if np.max(summaries[i]['hamming_distances_']) > hamm_max[i]:
                    hamm_max[i] = np.max(summaries[i]['hamming_distances_'])
                if np.max(summaries[i]['size_distances_']) > size_max[i]:
                    size_max[i] = np.max(summaries[i]['size_distances_'])
            else:
                if np.max(summaries[i]
                          ['jaccard_distances_uniq_']) * 100 > jacc_max[i]:
                    jacc_max[i] = np.max(
                        summaries[i]['jaccard_distances_uniq_']) * 100
                if np.max(
                        summaries[i]['hamming_distances_uniq_']) > hamm_max[i]:
                    hamm_max[i] = np.max(
                        summaries[i]['hamming_distances_uniq_'])
                if np.max(summaries[i]['size_distances_uniq_']) > size_max[i]:
                    size_max[i] = np.max(summaries[i]['size_distances_uniq_'])

    # Plotting
    for basis in basis_list:
        configs['basis'] = basis
        config_l = get_config_l(configs)
        summaries = [get_summary(c, burnin, thin, temper) for c in config_l]

        for i in range(len(summaries)):
            if not uniq and not proposed:
                axs[i, 0].hist(
                    summaries[i]['jaccard_distances'],
                    bins=np.arange(jacc_max[i] + 1) / 100,
                    label=BETTER_NAMES[basis],
                    alpha=.5,
                    weights=np.ones_like(summaries[i]['jaccard_distances']) /
                    y_ax_scale)
                axs[i, 1].hist(
                    summaries[i]['hamming_distances'],
                    bins=np.arange(hamm_max[i] + 1),
                    label=BETTER_NAMES[basis],
                    alpha=.5,
                    weights=np.ones_like(summaries[i]['hamming_distances']) /
                    y_ax_scale)
                axs[i, 2].hist(
                    summaries[i]['size_distances'],
                    bins=np.arange(size_max[i] + 1),
                    label=BETTER_NAMES[basis],
                    alpha=.5,
                    weights=np.ones_like(summaries[i]['size_distances']) /
                    y_ax_scale)
            elif uniq and not proposed:
                axs[i, 0].hist(
                    summaries[i]['jaccard_distances_uniq'],
                    bins=np.arange(jacc_max[i] + 1) / 100,
                    label=BETTER_NAMES[basis],
                    alpha=.5,
                    weights=np.ones_like(summaries[i]['jaccard_distances']) /
                    y_ax_scale)
                axs[i, 1].hist(
                    summaries[i]['hamming_distances_uniq'],
                    bins=np.arange(hamm_max[i] + 1),
                    label=BETTER_NAMES[basis],
                    alpha=.5,
                    weights=np.ones_like(summaries[i]['hamming_distances']) /
                    y_ax_scale)
                axs[i, 2].hist(
                    summaries[i]['size_distances_uniq'],
                    bins=np.arange(size_max[i] + 1),
                    label=BETTER_NAMES[basis],
                    alpha=.5,
                    weights=np.ones_like(summaries[i]['size_distances']) /
                    y_ax_scale)
            elif proposed and not uniq:
                axs[i, 0].hist(
                    summaries[i]['jaccard_distances_'],
                    bins=np.arange(jacc_max[i] + 1) / 100,
                    label=BETTER_NAMES[basis],
                    alpha=.5,
                    weights=np.ones_like(summaries[i]['jaccard_distances']) /
                    y_ax_scale)
                axs[i, 1].hist(
                    summaries[i]['hamming_distances_'],
                    bins=np.arange(hamm_max[i] + 1),
                    label=BETTER_NAMES[basis],
                    alpha=.5,
                    weights=np.ones_like(summaries[i]['hamming_distances']) /
                    y_ax_scale)
                axs[i, 2].hist(
                    summaries[i]['size_distances_'],
                    bins=np.arange(size_max[i] + 1),
                    label=BETTER_NAMES[basis],
                    alpha=.5,
                    weights=np.ones_like(summaries[i]['size_distances']) /
                    y_ax_scale)
            else:
                axs[i, 0].hist(
                    summaries[i]['jaccard_distances_uniq_'],
                    bins=np.arange(jacc_max[i] + 1) / 100,
                    label=BETTER_NAMES[basis],
                    alpha=.5,
                    weights=np.ones_like(summaries[i]['jaccard_distances']) /
                    y_ax_scale)
                axs[i, 1].hist(
                    summaries[i]['hamming_distances_uniq_'],
                    bins=np.arange(hamm_max[i] + 1),
                    label=BETTER_NAMES[basis],
                    alpha=.5,
                    weights=np.ones_like(summaries[i]['hamming_distances']) /
                    y_ax_scale)
                axs[i, 2].hist(
                    summaries[i]['size_distances_uniq_'],
                    bins=np.arange(size_max[i] + 1),
                    label=BETTER_NAMES[basis],
                    alpha=.5,
                    weights=np.ones_like(summaries[i]['size_distances']) /
                    y_ax_scale)

            axs[i, 0].legend(fontsize=30)
            axs[i, 1].legend(fontsize=30)
            axs[i, 2].legend(fontsize=30)

    if not uniq and not proposed:
        fig.savefig(f"distances_distr_n-{n}_n_obs-{n_obs}.pdf")
    elif uniq and not proposed:
        fig.savefig(f"distances_u_distr_n-{n}_n_obs-{n_obs}.pdf")
    elif proposed and not uniq:
        fig.savefig(f"distances_p_distr_n-{n}_n_obs-{n_obs}.pdf")
    else:
        fig.savefig(f"distances_u_p_distr_n-{n}_n_obs-{n_obs}.pdf")

    if plot:
        plt.show()

    return fig
    ["empty"],  #, "circle", "random0", "random1", "random2", "random3"],
    'prior': ['basis-count'],
    'basis': ['hub', 'edge'],
    'proposal': ['naive', 'BD'],
    'cob_freq': [100],
    'iter': [int(1e4)],
    'seed': 123
})


def run(conf):
    n, n_obs = conf['n'], conf['n_obs']
    name = conf['true_graph']
    data = np.loadtxt(f"data/{name}_{n}_{n_obs}.dat", delimiter=',')
    sampler = run_config(data, conf)

    with open(
            f"data/graph_{conf['true_graph']}_{conf['n']}_{conf['n_obs']}.pkl",
            'rb') as handle:
        g = pickle.load(handle)

    for burnin in [0, int(.1 * sampler.iter), int(.25 * sampler.iter)]:
        print(f"saving to {config_to_path(conf)[:-4]}_burnin-{burnin}.short")
        with open(config_to_path(conf)[:-4] + f"_burnin-{burnin}.short",
                  'wb') as handle:
            pickle.dump(sampler.get_summary(g, burnin, thin=100), handle)


pool = Pool()
pool.map(run, get_config_l(config))