Exemplo n.º 1
0
def posterior(epsilon, bs_dags, true_dag_dict, iv_means, iv_var, K):
    """
    computes the posterior given some interventions by sampling data
    from the true dag and using the list of bootstrapped dags as the support
    of the posterior
    """
    #read interventional data in
    T = len(bs_dags)
    # Generate observational data
    g = cd.GaussDAG.from_amat(np.asarray(true_dag_dict['A']))
    nsamples_iv = K

    ivs = [{
        target: cd.GaussIntervention(iv_means[target], iv_var)
        for target in targets
    } for targets in epsilon]
    y = [g.sample_interventional(iv, nsamples_iv) for iv in ivs]

    #convert epsilon to numpy
    logPy = finite.llhood(y, epsilon, bs_dags, (iv_means, iv_var))

    weighted_logPy = np.zeros(T)
    for j in range(T):
        weighted_logPy[j] = np.log(bs_dags[j]['w']) + logPy[j]

    P2 = np.zeros(T)  #this will be the log dist, we'll convert after
    denom = logsumexp(weighted_logPy)
    for j in range(T):
        P2[j] = weighted_logPy[j] - denom
    P2 = np.exp(P2)  #currently just have the log dist
    for j in range(T):
        bs_dags[j]['w'] = P2[j]
    return bs_dags
Exemplo n.º 2
0
    def MI_obj(epsilon, verbose=False, iter=False):
        #epsilon is a list of lists
        #to speed-up run-times, we just assume you get K samples of each intervention
        #t0= time.time()
        obj = 0
        if len(epsilon) == 0:
            return -np.inf
        for i in range(T):
            for _ in range(M):
                #sample y_mt from the intervention and compute p(y) given each possible DAG
                #first build a causaldag representation of the dag and adjacency, then sample from it
                #using the intervention
                #t1=time.time()
                cdag = cd.GaussDAG.from_amat(bs_dags[i]['A'],
                                             variances=bs_dags[i]['b'])
                #print(time.time()-t1)
                nsamples_iv = K

                #t2 = time.time()
                ivs = [{
                    target: cd.GaussIntervention(iv_means[target], iv_var)
                    for target in targets
                } for targets in epsilon]
                y_mt = [
                    cdag.sample_interventional(iv, nsamples_iv) for iv in ivs
                ]
                logPy = finite.llhood(y_mt, epsilon, bs_dags,
                                      (iv_means, iv_var))
                #print(time.time()-t2)

                weighted_logPy = np.zeros(T)
                for j in range(T):
                    weighted_logPy[j] = np.log(
                        bs_dags[j]['w'] / sum_ws) + logPy[j]

                #don't need compute P1 for entropies since it's constant
                #P2 is a categorical dist over DAGS
                P2 = np.zeros(
                    T)  #this will be the log dist, we'll convert after
                denom = logsumexp(weighted_logPy)
                for j in range(T):
                    P2[j] = weighted_logPy[j] - denom
                P2 = np.exp(P2)  #currently just have the log dist
                if verbose:
                    print(P2)
                H2 = entropy(P2)  #H2 is just the entropy induced by P2
                obj = obj - H2 * ws[i] / (M * sum_ws)
        #print(time.time()-t0)
        return obj + entropy(ws)  #add prior entropy so > 0
Exemplo n.º 3
0
import causaldag as cd
import time
import numpy as np

g = cd.GaussDAG([0, 1, 2], arcs={(0, 1), (0, 2)})
cov = g.covariance
nsamples = 2500
trials = 10
iv = {1: cd.GaussIntervention(0, 1)}

# === TIME INVARIANCE TEST NO CONDITIONING SET
start = time.time()
for _ in range(trials):
    samples = g.sample(nsamples)
    cd.utils.ci_tests.hsic_test_vector(samples[:, 0], samples[:, 1])
print(time.time() - start)

# === TIME INVARIANCE TEST WITH CONDITIONING SET
# start = time.time()
# for _ in range(trials):
#     samples = g.sample(nsamples)
#     cd.utils.ci_tests.hsic_invariance_test(samples[:, 0], samples[:, 1], 0)
# print(time.time() - start)

Exemplo n.º 4
0
from causaldag.inference.structural import igsp
from causaldag.utils.ci_tests import gauss_ci_test, hsic_invariance_test
import causaldag as cd
import numpy as np
import random

np.random.seed(40)
random.seed(9879132)

nnodes = 10
nsamples = 100
dag = cd.rand.directed_erdos(nnodes, 1.5 / (nnodes - 1), 1)
gdag = cd.rand.rand_weights(dag)
obs_samples = gdag.sample(nsamples)
sample_dict = {}
sample_dict[frozenset()] = obs_samples
for i in range(10):
    sample_dict[frozenset({i})] = gdag.sample_interventional_perfect(
        {i: cd.GaussIntervention(1, .1)}, nsamples)
suffstat = dict(C=np.corrcoef(obs_samples, rowvar=False), n=nsamples)

est_dag = igsp(sample_dict,
               suffstat,
               nnodes,
               gauss_ci_test,
               hsic_invariance_test,
               1e-5,
               1e-5,
               nruns=5,
               verbose=True)
Exemplo n.º 5
0
import numpy as np
import random
import causaldag as cd
import itertools as itr
import os
from config import PROJECT_FOLDER

DATA_FOLDER = os.path.join(PROJECT_FOLDER, 'simulations', 'data')

INTERVENTIONS = {
    'perfect1': cd.GaussIntervention(1, .01),
    'perfect2': cd.GaussIntervention(1, .1),
    'inhibitory1': cd.ScalingIntervention(.1, .2),
    'soft1': cd.ScalingIntervention(.1, .2, mean=1),
    'zero': cd.GaussIntervention(0, 1),
    'shift': cd.ShiftIntervention(2)
}


def get_dag_folder(ndags, nnodes, nneighbors, dag_num, nonlinear=False):
    nonlinear_str = '_nonlinear' if nonlinear else ''
    base_folder = os.path.join(
        DATA_FOLDER,
        f'nnodes={nnodes}_nneighbors={nneighbors}_ndags={ndags}{nonlinear_str}'
    )
    return os.path.join(base_folder, f'dag{dag_num}')


def get_sample_folder(ndags,
                      nnodes,
                      nneighbors,
Exemplo n.º 6
0
import causaldag as cd
import matplotlib.pyplot as plt
import os
import random
import numpy as np
from numpy.ma import masked_array

random.seed(181)
np.random.seed(181)

nsamples = 10

g = cd.GaussDAG(nodes=[0, 1, 2], arcs={(0, 1): 1, (0, 2): 1}, variances=[1, .2, .2])
obs_samples = g.sample(nsamples)

iv1_samples = g.sample_interventional({1: cd.GaussIntervention(0, 1)}, nsamples=nsamples)
iv01_samples = g.sample_interventional({1: cd.GaussIntervention(0, 1), 0: cd.GaussIntervention(1, .1)}, nsamples=nsamples)

cmap = 'bwr'
plt.clf()
os.makedirs('figures/example_data/', exist_ok=True)
plt.imshow(obs_samples, cmap=cmap)
plt.xticks([])
plt.yticks([])
plt.tight_layout()
plt.savefig('figures/example_data/obs.png', transparent=True, bbox_inches='tight')

plt.clf()
plt.imshow(iv1_samples, cmap=cmap)
plt.xticks([])
plt.yticks([])
Exemplo n.º 7
0
def simulate(strategy, simulator_config, gdag, strategy_folder, num_bootstrap_dags_final=100, save_gies=True):
    if os.path.exists(os.path.join(strategy_folder, 'samples')):
        return

    # === SAVE SIMULATION META-INFORMATION
    os.makedirs(strategy_folder, exist_ok=True)
    simulator_config.save(strategy_folder)

    # === SAMPLE SOME OBSERVATIONAL DATA TO START WITH
    n_nodes = len(gdag.nodes)
    all_samples = {i: np.zeros([0, n_nodes]) for i in range(n_nodes)}
    all_samples[-1] = gdag.sample(simulator_config.starting_samples)
    precision_matrix = np.linalg.inv(all_samples[-1].T @ all_samples[-1] / len(all_samples[-1]))

    # === GET GIES SAMPLES GIVEN JUST OBSERVATIONAL DATA
    if save_gies:
        initial_samples_path = os.path.join(strategy_folder, 'initial_samples.csv')
        initial_interventions_path = os.path.join(strategy_folder, 'initial_interventions')
        initial_gies_dags_path = os.path.join(strategy_folder, 'initial_dags/')
        graph_utils._write_data(all_samples, initial_samples_path, initial_interventions_path)
        graph_utils.run_gies_boot(num_bootstrap_dags_final, initial_samples_path, initial_interventions_path, initial_gies_dags_path)
        amats, dags = graph_utils._load_dags(initial_gies_dags_path, delete=True)
        for d, amat in enumerate(amats):
            np.save(os.path.join(initial_gies_dags_path, 'dag%d.npy' % d), amat)

    # === SPECIFY INTERVENTIONAL DISTRIBUTIONS BASED ON EACH NODE'S STANDARD DEVIATION
    intervention_set = list(range(n_nodes))
    if simulator_config.intervention_type == 'node-variance':
        interventions = [
            cd.BinaryIntervention(
                intervention1=cd.ConstantIntervention(val=-simulator_config.intervention_strength * std),
                intervention2=cd.ConstantIntervention(val=simulator_config.intervention_strength * std)
            ) for std in np.diag(gdag.covariance) ** .5
        ]
    elif simulator_config.intervention_type == 'constant-all':
        interventions = [
            cd.BinaryIntervention(
                intervention1=cd.ConstantIntervention(val=-simulator_config.intervention_strength),
                intervention2=cd.ConstantIntervention(val=simulator_config.intervention_strength)
            ) for _ in intervention_set
        ]
    elif simulator_config.intervention_type == 'gauss':
        interventions = [
            cd.GaussIntervention(mean=0, variance=simulator_config.intervention_strength) for _ in intervention_set
        ]
    elif simulator_config.intervention_type == 'constant':
        interventions = [
            cd.ConstantIntervention(val=0) for _ in intervention_set
        ]
    else:
        raise ValueError

    if not simulator_config.target_allowed:
        del intervention_set[simulator_config.target]
        del interventions[simulator_config.target]
    print(intervention_set)

    # === RUN STRATEGY ON EACH BATCH
    for batch in range(simulator_config.n_batches):
        print('Batch %d with %s' % (batch, simulator_config))
        batch_folder = os.path.join(strategy_folder, 'dags_batch=%d/' % batch)
        os.makedirs(batch_folder, exist_ok=True)
        iteration_data = IterationData(
            current_data=all_samples,
            max_interventions=simulator_config.max_interventions,
            n_samples=simulator_config.n_samples,
            batch_num=batch,
            n_batches=simulator_config.n_batches,
            intervention_set=intervention_set,
            interventions=interventions,
            batch_folder=batch_folder,
            precision_matrix=precision_matrix
        )
        recommended_interventions = strategy(iteration_data)
        if not sum(recommended_interventions.values()) == iteration_data.n_samples / iteration_data.n_batches:
            raise ValueError('Did not return correct amount of samples')
        rec_interventions_nonzero = {intv_ix for intv_ix, ns in recommended_interventions.items() if ns != 0}
        if simulator_config.max_interventions is not None and len(rec_interventions_nonzero) > simulator_config.max_interventions:
            raise ValueError('Returned too many interventions')

        for intv_ix, nsamples in recommended_interventions.items():
            iv_node = intervention_set[intv_ix]
            new_samples = gdag.sample_interventional({iv_node: interventions[intv_ix]}, nsamples)
            all_samples[iv_node] = np.vstack((all_samples[iv_node], new_samples))

    samples_folder = os.path.join(strategy_folder, 'samples')
    os.makedirs(samples_folder, exist_ok=True)
    for i, samples in all_samples.items():
        np.savetxt(os.path.join(samples_folder, 'intervention=%d.csv' % i), samples)

    # === CHECK THE TOTAL NUMBER OF SAMPLES IS CORRECT
    nsamples_final = sum(all_samples[iv_node].shape[0] for iv_node in intervention_set + [-1])
    if nsamples_final != simulator_config.starting_samples + simulator_config.n_samples:
        raise ValueError('Did not use all samples')

    # === GET GIES SAMPLES GIVEN THE DATA FOR THIS SIMULATION
    if save_gies:
        final_samples_path = os.path.join(strategy_folder, 'final_samples.csv')
        final_interventions_path = os.path.join(strategy_folder, 'final_interventions')
        final_gies_dags_path = os.path.join(strategy_folder, 'final_dags/')
        graph_utils._write_data(all_samples, final_samples_path, final_interventions_path)
        graph_utils.run_gies_boot(num_bootstrap_dags_final, final_samples_path, final_interventions_path, final_gies_dags_path)
        amats, dags = graph_utils._load_dags(final_gies_dags_path, delete=True)
        for d, amat in enumerate(amats):
            np.save(os.path.join(final_gies_dags_path, 'dag%d.npy' % d), amat)
Exemplo n.º 8
0
            # = find entropy
            mask = probs != 0
            plogps = np.zeros(len(probs))
            plogps[mask] = np.log2(probs[mask]) * probs[mask]
            return -plogps.sum()

        return get_k_entropy

    np.random.seed(100)
    g = cd.rand.directed_erdos(10, .5)
    g = cd.GaussDAG(nodes=list(range(10)), arcs=g.arcs)

    mec = [
        cd.DAG(arcs=arcs) for arcs in cd.DAG(arcs=g.arcs).cpdag().all_dags()
    ]
    strat = create_info_gain_strategy_dag_collection(
        mec, [get_mec_functional_k(mec)], [get_k_entropy_fxn(len(mec))],
        verbose=True)
    samples = g.sample(1000)
    precision_matrix = samples.T @ samples / 1000
    sel_interventions = strat(
        IterationData(current_data={-1: g.sample(1000)},
                      max_interventions=1,
                      n_samples=500,
                      batch_num=0,
                      n_batches=1,
                      intervention_set=[0, 1, 2],
                      interventions=[cd.GaussIntervention() for _ in range(3)],
                      batch_folder='test_sanity',
                      precision_matrix=precision_matrix))
Exemplo n.º 9
0
if __name__ == '__main__':
    import numpy as np
    import causaldag as cd
    from utils.graph_utils import cross_entropy_interventional, get_covariance_interventional, get_precision_interventional
    from scipy import stats

    amat1 = np.array([[0, 2, 3], [0, 0, 5], [0, 0, 0]])
    g1 = cd.GaussDAG.from_amat(amat1, variances=[2, 2, 2])

    amat2 = np.array([[0, 3, 3], [0, 0, 5], [0, 0, 0]])
    g2 = cd.GaussDAG.from_amat(amat2)

    iv_variance = .1
    actual = cross_entropy_interventional(g1, g2, 0, iv_variance)
    g1_samples = g1.sample_interventional(
        {0: cd.GaussIntervention(mean=0, variance=iv_variance)}, 1000000)
    g2_logpdfs = g2.logpdf(
        g1_samples, {0: cd.GaussIntervention(mean=0, variance=iv_variance)})
    print('approx', g2_logpdfs.mean())
    print('actual', actual)

    cov1 = get_covariance_interventional(g1, 0, iv_variance)
    cov2 = get_covariance_interventional(g2, 0, iv_variance)

    p = 3
    .5 * (-p + np.trace(np.linalg.inv(cov2).dot(cov1)) +
          np.log(np.linalg.det(cov2) - np.log(np.linalg.det(cov1))) +
          np.log(np.linalg.det(2 * np.pi * np.e * cov1)))

    samples = stats.multivariate_normal(cov=cov1).rvs(1000000)
    logpdfs = stats.multivariate_normal(cov=cov2).logpdf(samples)
Exemplo n.º 10
0
        [0, 2, 3],
        [0, 0, 5],
        [0, 0, 0]
    ])
    g1 = cd.GaussDAG.from_amat(amat1, variances=[2, 2, 2])

    amat2 = np.array([
        [0, 3, 3],
        [0, 0, 5],
        [0, 0, 0]
    ])
    g2 = cd.GaussDAG.from_amat(amat2)

    iv_variance = .1
    actual = cross_entropy_interventional(g1, g2, 0, iv_variance)
    g1_samples = g1.sample_interventional({0: cd.GaussIntervention(mean=0, variance=iv_variance)}, 1000000)
    g2_logpdfs = g2.logpdf(g1_samples, {0: cd.GaussIntervention(mean=0, variance=iv_variance)})
    print('approx', g2_logpdfs.mean())
    print('actual', actual)

    cov1 = get_covariance_interventional(g1, 0, iv_variance)
    cov2 = get_covariance_interventional(g2, 0, iv_variance)

    p = 3
    .5 * (-p + np.trace(np.linalg.inv(cov2).dot(cov1)) + np.log(np.linalg.det(cov2) - np.log(np.linalg.det(cov1))) + np.log(np.linalg.det(2 * np.pi * np.e * cov1) ))

    samples = stats.multivariate_normal(cov=cov1).rvs(1000000)
    logpdfs = stats.multivariate_normal(cov=cov2).logpdf(samples)
    cd_samples = g1.sample_interventional({0: cd.GaussIntervention(mean=0, variance=iv_variance)}, 1000000)
    logpdfs_cd_samples = stats.multivariate_normal(cov=cov2).logpdf(cd_samples)
    print('scipy approx', logpdfs.mean())
Exemplo n.º 11
0
from causaldag.utils.ci_tests import gauss_ci_test
import numpy as np
import random

np.random.seed(1729)
random.seed(1729)

nnodes = 15
g = cd.rand.rand_weights(cd.rand.directed_erdos(nnodes, 3 / (nnodes - 1), 1))
iv_node = random
nsamples = 100
samples = {
    frozenset():
    g.sample(nsamples),
    frozenset({iv_node}):
    g.sample_interventional_perfect({iv_node: cd.GaussIntervention(1, .1)},
                                    nsamples)
}
corr = np.corrcoef(samples[frozenset()], rowvar=False)
suffstat = dict(C=corr, n=nsamples)
profiler = LineProfiler()


def run_gsp():
    for i in range(20):
        gsp(suffstat, nnodes, gauss_ci_test, nruns=10)


profiler.add_function(gsp)
profiler.runcall(run_gsp)
profiler.print_stats()
Exemplo n.º 12
0
	for adj_mat in adj_mats:
		avg_inc_mat += graph_utils.adj2inc(adj_mat)
	true_inc_mat = graph_utils.adj2inc(true_adj_mat)
	roc_curve(true_inc_mat[:, target], avg_inc_mat[:, target])

if __name__ == '__main__':
	adj_true = np.loadtxt('./data/dataset_5000/graph_2/adjacency.csv')
	g = cd.from_amat(adj_true)
	dict_weights = {}
	for arc in g.arcs:
		dict_weights[arc] = adj_true[arc[0], arc[1]]
	gdag = cd.GaussDAG(nodes=list(range(50)), arcs=dict_weights)
	all_data = [gdag.sample(250)]
	interventions = [-1] * 250
	all_iv = np.random.randint(0, 50, 10)
	for iv in all_iv:
		interventions += [iv] * 25
		g_iv = cd.GaussIntervention(mean=2, variance=1)
		all_data.append(gdag.sample_interventional({iv: g_iv}, 25))

	all_data = np.vstack(all_data)
	interventions = np.array(interventions)
	interventions[interventions != -1] = interventions[interventions != -1] + 1
	np.savetxt('./random_data', all_data)
	np.savetxt('./random_interventions', interventions)
	interventions[interventions != -1] = interventions[interventions != -1] - 1
	graph_utils.run_gies_boot(200, './random_data', './random_interventions')
	adj_mats = graph_utils.load_adj_mats()
	np.save('./data/dataset_5000/graph_2/adj_mats_random', adj_mats)

Exemplo n.º 13
0
        else:
            context = int(j[1:])
            node = i
        return gauss_invariance_test(suffstat['invariance'],
                                     context,
                                     node,
                                     cond_set=cond_set,
                                     alpha=alpha_inv)


nnodes = 5
nodes = set(range(nnodes))
nneighbors = 1.5
nsettings = 5
num_unknown_targets = 0
INTERVENTION = cd.GaussIntervention(1, .01)
d = cd.rand.directed_erdos(nnodes, nneighbors / (nnodes - 1))
g = cd.rand.rand_weights(d)
known_iv_list = random.sample(list(nodes), nsettings)
unknown_ivs_list = [
    random.sample(list(nodes - {known_iv}), num_unknown_targets)
    for known_iv in known_iv_list
]
all_ivs_list = [{
    known_iv, *unknown_ivs
} for known_iv, unknown_ivs in zip(known_iv_list, unknown_ivs_list)]

nsamples = 5000
obs_samples = g.sample(nsamples)
iv_samples_list = [
    g.sample_interventional({iv: INTERVENTION
Exemplo n.º 14
0
import numpy as np
import random

np.random.seed(40)
random.seed(9879132)

nnodes = 10
nsamples = 100
dag = cd.rand.directed_erdos(nnodes, 1.5 / (nnodes - 1), 1)
gdag = cd.rand.rand_weights(dag)
obs_samples = gdag.sample(nsamples)
setting_list = []
for i in range(10):
    iv_samples = gdag.sample_interventional_perfect(
        {
            i: cd.GaussIntervention(1, .1),
            0: cd.GaussIntervention(1, .1)
        }, nsamples)
    setting_list.append({'known_interventions': {i}, 'samples': iv_samples})
suffstat = dict(C=np.corrcoef(obs_samples, rowvar=False), n=nsamples)

est_dag, learned_intervention_targets = unknown_target_igsp(
    obs_samples,
    setting_list,
    suffstat,
    nnodes,
    gauss_ci_test,
    hsic_invariance_test,
    1e-5,
    1e-5,
    nruns=5,
Exemplo n.º 15
0
from R_algs.wrappers import run_icp
import causaldag as cd
import os
import numpy as np
from config import PROJECT_FOLDER

nsamples = 10
g = cd.GaussDAG([0, 1, 2], arcs={(0, 1), (1, 2)})
obs_samples = g.sample(nsamples)
iv_node = 1
iv_samples = g.sample_interventional_perfect(
    {iv_node: cd.GaussIntervention(10, .01)}, nsamples)

# === SAVE DATA
sample_folder = os.path.join(PROJECT_FOLDER, 'tmp_icp_test')
iv_sample_folder = os.path.join(sample_folder, 'interventional')
os.makedirs(iv_sample_folder, exist_ok=True)
np.savetxt(os.path.join(sample_folder, 'observational.txt'), obs_samples)
np.savetxt(
    os.path.join(iv_sample_folder, 'known_ivs=%s;unknown_ivs=.txt' % iv_node),
    iv_samples)

# === RUN ICP
run_icp(sample_folder, .01)