Exemplo n.º 1
0
def run_pvalue():
    num_graphs = graphs.shape[0]
    pvalues = np.zeros((num_graphs, num_graphs))

    for i in range(num_graphs):
        for j in range(i + 1, num_graphs):
            print('{}, {}'.format(i, j))
            G1 = graphs[i, ...]
            G2 = graphs[j, ...]
            if args.test == 1:
                if args.Z == 1:
                    Z = Ztrue
                elif args.Z == 2:
                    Z = Zhat[(i, j)]
                elif args.Z == 3:
                    Z = community_estimation(G1,
                                             G2,
                                             min_components=max_comm,
                                             max_components=max_comm)
                pvalues[i,
                        j] = block_permutation_pvalue(G1,
                                                      G2,
                                                      test='gcorr',
                                                      num_perm=args.num_iter,
                                                      Z=Z)
            elif args.test == 2:
                Z = None
                if args.Z == 3:
                    min_comm = max_comm
                elif args.Z == 2:
                    min_comm = 1
                elif args.Z == 1:
                    min_comm = 1  # is NOT used
                    Z = Ztrue
                pvalues[i,
                        j] = dcsbm_pvalue(G1,
                                          G2,
                                          min_comm=min_comm,
                                          max_comm=max_comm,
                                          epsilon1=epsilon1,
                                          epsilon2=epsilon2,
                                          pooled_variance=args.pooled_variance,
                                          num_perm=args.num_iter,
                                          Z1=Z,
                                          Z2=Z)
            elif args.test == 3:
                pvalues[i, j] = pearson_exact_pvalue(G1, G2)

    return pvalues
Exemplo n.º 2
0
# each row stores the optimal community assignment for a pair of graphs
# easier for iterating over
Zhat = np.zeros((num_tests, num_vertices))
# key: (i, j), value: optimal assignment for graph i and j
# easier for access
Zhat_dict = {}

count = 0
for i in range(num_graphs):
    for j in range(i, num_graphs):
        print('{}, {}'.format(i, j))
        G1 = graphs[i, ...]
        if i == j:
            assignment = community_estimation(G1,
                                              min_components=2,
                                              max_components=max_comp)
        else:
            G2 = graphs[j, ...]
            assignment = community_estimation(G1,
                                              G2,
                                              min_components=2,
                                              max_components=max_comp)
        Zhat[count, :] = assignment
        count += 1
        Zhat_dict[(i, j)] = assignment

with open('outputs/{}_Zhat_{}.pkl'.format(args.data, args.transformation),
          'wb') as f:
    pickle.dump(Zhat, f)
with open('outputs/{}_Zhat_dict_{}.pkl'.format(args.data, args.transformation),
            n = num_vertices[i]
            Z = np.repeat([0], n)

        if sim == 'bern':
            if 'sbm' in setting:
                G1, G2 = sbm_corr_diffmarg(n, p, q, r)
            else:
                G1, G2 = er_corr_diffmarg(n, p, q, r)
        elif sim == 'gauss':
            if 'sbm' in setting:
                G1, G2 = sbm_corr_weighted(n, mu1, mu2, Sigma)
            else:
                G1, G2 = er_corr_weighted(n, mu1, mu2, Sigma)

        if setting == 'sbm_estblock':
            Z = community_estimation(G1, G2, min_components=5)

        G2_vertex_perm = vertex_permutation(G2)
        G2_block_perm = block_permutation(G2, Z)
        test_stats_null['pearson_vertex_perm'][i, rep] = pearson_graph(G1, G2_vertex_perm)
        test_stats_alt['pearson_vertex_perm'][i, rep] = pearson_graph(G1, G2)
        test_stats_null['pearson_block_perm'][i, rep] = pearson_graph(G1, G2_block_perm)
        test_stats_alt['pearson_block_perm'][i, rep] = pearson_graph(G1, G2)
        test_stats_null['gcorr_vertex_perm'][i, rep] = gcorr(G1, G2_vertex_perm, Z)
        test_stats_alt['gcorr_vertex_perm'][i, rep] = gcorr(G1, G2, Z)
        test_stats_null['gcorr_block_perm'][i, rep] = gcorr(G1, G2_block_perm, Z)
        test_stats_alt['gcorr_block_perm'][i, rep] = gcorr(G1, G2, Z)
        pearson_exact_pvals[i, rep] = pearson_exact_pvalue(G1, G2)


# compute power
result = {
    'true': corr,
    'gcorr_pooled': np.zeros((num_corr, num_rep)),
    'gcorr_unpooled': np.zeros((num_corr, num_rep)),
    'gcorr_dcsbm_pooled': np.zeros((num_corr, num_rep)),
    'gcorr_dcsbm_unpooled': np.zeros((num_corr, num_rep))
}

for i, r in enumerate(corr):
    print('iteration {} in {}: r = {}'.format(i + 1, num_corr, r))
    for j in tqdm(range(num_rep)):
        g1, g2 = dcsbm_corr(n, p, r, theta)
        if args.sim == 'estblock':
            # note `community_estimation` uses MASE
            # whereas `graspologic.DCSBM` uses LSE
            zest = community_estimation(g1, g2, maxcomm)
            result['gcorr_pooled'][i, j] = gcorr(g1,
                                                 g2,
                                                 zest,
                                                 pooled_variance=True)
            result['gcorr_unpooled'][i, j] = gcorr(g1,
                                                   g2,
                                                   zest,
                                                   pooled_variance=False)
            result['gcorr_dcsbm_pooled'][i,
                                         j] = gcorr_dcsbm(g1,
                                                          g2,
                                                          maxcomm,
                                                          pooled_variance=True,
                                                          Z=None)
            result['gcorr_dcsbm_unpooled'][i, j] = gcorr_dcsbm(
Exemplo n.º 5
0
def run_test_stats():
    num_graphs = graphs.shape[0]
    test_stats = np.zeros((num_graphs, num_graphs))
    if args.return_fit:
        dcsbm_fit = {}

    for i in range(num_graphs):
        for j in range(i + 1, num_graphs):
            print('{}, {}'.format(i, j))
            G1 = graphs[i, ...]
            G2 = graphs[j, ...]
            if args.test == 1:
                if args.Z == 1:
                    Z = Ztrue
                elif args.Z == 2:
                    Z = Zhat[(i, j)]
                elif args.Z == 3:
                    Z = community_estimation(G1,
                                             G2,
                                             min_components=max_comm,
                                             max_components=max_comm)
                test_stats[i, j] = gcorr(G1,
                                         G2,
                                         Z,
                                         pooled_variance=args.pooled_variance)
            elif args.test == 2:
                Z = None
                if args.Z == 3:
                    min_comm = max_comm
                elif args.Z == 2:
                    min_comm = 1
                elif args.Z == 1:
                    min_comm = 1  # is NOT used
                    Z = Ztrue
                if args.return_fit:
                    ts, fit = gcorr_dcsbm(G1,
                                          G2,
                                          min_comm=min_comm,
                                          max_comm=max_comm,
                                          epsilon1=epsilon1,
                                          epsilon2=epsilon2,
                                          pooled_variance=args.pooled_variance,
                                          Z1=Z,
                                          Z2=Z,
                                          return_fit=True,
                                          seed=seed)
                    test_stats[i, j] = ts
                    dcsbm_fit[(i, j)] = fit
                else:
                    ts = gcorr_dcsbm(G1,
                                     G2,
                                     min_comm=min_comm,
                                     max_comm=max_comm,
                                     epsilon1=epsilon1,
                                     epsilon2=epsilon2,
                                     pooled_variance=args.pooled_variance,
                                     Z1=Z,
                                     Z2=Z,
                                     seed=seed)
                    test_stats[i, j] = ts
            elif args.test == 3:
                test_stats[i, j] = pearson_graph(G1, G2)

    if args.return_fit:
        return test_stats, dcsbm_fit
    else:
        return test_stats