def directed_random_graph(nnodes: int, random_graph_model: Callable, size=1, as_list=False) -> Union[DAG, List[DAG]]: if size == 1: # generate a random undirected graph edges = random_graph_model(nnodes).edges # generate a random permutation random_permutation = np.arange(nnodes) np.random.shuffle(random_permutation) arcs = [] for edge in edges: node1, node2 = edge node1_position = np.where(random_permutation == node1)[0][0] node2_position = np.where(random_permutation == node2)[0][0] if node1_position < node2_position: source = node1 endpoint = node2 else: source = node2 endpoint = node1 arcs.append((source, endpoint)) d = DAG(nodes=set(range(nnodes)), arcs=arcs) return [d] if as_list else d else: return [ directed_random_graph(nnodes, random_graph_model) for _ in range(size) ]
def to_dag(self): """ Return a DAG that is consistent with this CPDAG. Returns ------- d Examples -------- TODO """ from causaldag import DAG pdag2 = self.copy() arcs = set() while len(pdag2._edges) + len(pdag2._arcs) != 0: is_sink = lambda n: len(pdag2._children[n]) == 0 no_vstructs = lambda n: all( (pdag2._neighbors[n] - {u_nbr}).issubset(pdag2._neighbors[u_nbr]) for u_nbr in pdag2._undirected_neighbors[n] ) sink = next((n for n in pdag2._nodes if is_sink(n) and no_vstructs(n)), None) if sink is None: break arcs.update((nbr, sink) for nbr in pdag2._neighbors[sink]) pdag2.remove_node(sink) return DAG(arcs=arcs)
def directed_erdos(nnodes, density, size=1, as_list=False) -> Union[DAG, List[DAG]]: """ Generate random Erdos-Renyi DAG(s) on `nnodes` nodes with density `density`. Parameters ---------- nnodes: Number of nodes in each graph. density: Probability of any edge. size: Number of graphs. as_list: If True, always return as a list, even if only one DAG is generated. Examples -------- >>> d = cd.rand.directed_erdos(5, .5) """ if size == 1: bools = _coin(density, size=int(nnodes * (nnodes - 1) / 2)) arcs = {(i, j) for (i, j), b in zip(itr.combinations(range(nnodes), 2), bools) if b} d = DAG(nodes=set(range(nnodes)), arcs=arcs) return [d] if as_list else d else: return [directed_erdos(nnodes, density) for _ in range(size)]
def directed_erdos(nnodes, density, size=1): """ Generate random Erdos-Renyi DAG(s) on `nnodes` nodes with density `density`. Parameters ---------- nnodes: Number of nodes in each graph. density: Probability of any edge. size: Number of graphs. Examples -------- >>> d = cd.rand.directed_erdos(5, .5) """ if size == 1: bools = _coin(density, size=int(nnodes * (nnodes - 1) / 2)) arcs = {(i, j) for (i, j), b in zip(itr.combinations(range(nnodes), 2), bools) if b} return DAG(nodes=set(range(nnodes)), arcs=arcs) else: return [directed_erdos(nnodes, density) for _ in range(size)]
def perm2dag2(perm, ci_tester, node2nbrs=None): arcs = set() for (i, pi_i), (j, pi_j) in itr.combinations(enumerate(perm), 2): c = set(perm[:j]) - {pi_i} c = c if node2nbrs is None else c & (node2nbrs[pi_i] | node2nbrs[pi_j]) print(pi_i, pi_j, c) if not ci_tester.is_ci(pi_i, pi_j, c): arcs.add((pi_i, pi_j)) return DAG(nodes=set(perm), arcs=arcs)
def to_gauss_dag(self, perm): """ Return a GaussDAG with the same mean and covariance as this GGM, and is a minimal IMAP of this GGM consistent with the node ordering `perm`. Parameters ---------- perm: The desired permutation, or total order, of the nodes in the result. Returns ------- Examples -------- TODO """ from causaldag import DAG, GaussDAG d = DAG(nodes=self.nodes) ixs = list( itr.chain.from_iterable( ((f, s) for f in range(s)) for s in range(len(perm)))) for i, j in ixs: pi_i, pi_j = perm[i], perm[j] if not np.isclose( self.partial_correlation(pi_i, pi_j, d.markov_blanket(pi_i)), 0): d.add_arc(pi_i, pi_j, unsafe=True) arcs = dict() means = [] Sigma = self.covariance variances = [] for i in perm: ps = list(d.parents_of(i)) # === LINEAR REGRESSION TO FIND EDGE WEIGHTS S_xx = Sigma[np.ix_(ps, ps)] S_xy = Sigma[ps, i] coeffs = inv(S_xx) @ S_xy # === COMPUTE MEAN AND VARIANCE mean = self.means[i] - self.means[ps] @ coeffs.T variance = Sigma[i, i] - Sigma[i, ps] @ coeffs for p, coeff in zip(ps, coeffs): print(p, i) arcs[(p, i)] = coeff means.append(mean) variances.append(variance) return GaussDAG(list(range(self.num_nodes)), arcs, means=means, variances=variances)
def perm2dag(perm, ci_tester: CI_Tester, verbose=False, fixed_adjacencies=set(), fixed_gaps=set(), node2nbrs=None, older=False): """ TODO Parameters ---------- perm ci_tester verbose fixed_adjacencies fixed_gaps node2nbrs older Examples -------- TODO """ d = DAG(nodes=set(perm)) ixs = list( itr.chain.from_iterable( ((f, s) for f in range(s)) for s in range(len(perm)))) for i, j in ixs: pi_i, pi_j = perm[i], perm[j] # === IF FIXED, DON'T TEST if (pi_i, pi_j) in fixed_adjacencies or (pi_j, pi_i) in fixed_adjacencies: d.add_arc(pi_i, pi_j) continue if (pi_i, pi_j) in fixed_gaps or (pi_j, pi_i) in fixed_gaps: continue # === TEST MARKOV BLANKET mb = d.markov_blanket(pi_i) if node2nbrs is None else ( set(perm[:j]) - {pi_i}) & (node2nbrs[pi_i] | node2nbrs[pi_j]) mb = mb if not older else set(perm[:j]) - {pi_i} is_ci = ci_tester.is_ci(pi_i, pi_j, mb) if not is_ci: d.add_arc(pi_i, pi_j, unsafe=True) if verbose: print("%s indep of %s given %s: %s" % (pi_i, pi_j, mb, is_ci)) return d
def perm2dag_subsets(perm, ci_tester, max_subset_size=None): """ Not recommended unless max_subset_size set very small. Not thoroughly tested. """ arcs = set() nodes = set(perm) for i, pi_i in enumerate(perm): for candidate_parent_set in powerset(perm[:i], r_max=max_subset_size): print(candidate_parent_set) if all( ci_tester.is_ci(i, j, candidate_parent_set) for j in nodes - {i} - candidate_parent_set): # if ci_tester.is_ci(i, nodes - {i} - candidate_parent_set, candidate_parent_set): arcs.update({(parent, i) for parent in candidate_parent_set}) break return DAG(nodes=nodes, arcs=arcs)
def directed_erdos(nnodes, density=None, exp_nbrs=None, size=1, as_list=False, random_order=True) -> Union[DAG, List[DAG]]: """ Generate random Erdos-Renyi DAG(s) on `nnodes` nodes with density `density`. Parameters ---------- nnodes: Number of nodes in each graph. density: Probability of any edge. size: Number of graphs. as_list: If True, always return as a list, even if only one DAG is generated. Examples -------- >>> import causaldag as cd >>> d = cd.rand.directed_erdos(5, .5) """ assert density is not None or exp_nbrs is not None density = density if density is not None else exp_nbrs / (nnodes - 1) if size == 1: # if density < .01: # print('here') # random_nx = fast_gnp_random_graph(nnodes, density, directed=True) # d = DAG(nodes=set(range(nnodes)), arcs=random_nx.edges) # return [d] if as_list else d bools = _coin(density, size=int(nnodes * (nnodes - 1) / 2)) arcs = {(i, j) for (i, j), b in zip(itr.combinations(range(nnodes), 2), bools) if b} d = DAG(nodes=set(range(nnodes)), arcs=arcs) if random_order: nodes = list(range(nnodes)) d = d.rename_nodes(dict(enumerate(np.random.permutation(nodes)))) return [d] if as_list else d else: return [ directed_erdos(nnodes, density, random_order=random_order) for _ in range(size) ]
def directed_erdos_with_confounders( nnodes: int, density: Optional[float] = None, exp_nbrs: Optional[float] = None, num_confounders: int = 1, confounder_pervasiveness: float = 1, size=1, as_list=False, random_order=True) -> Union[DAG, List[DAG]]: assert density is not None or exp_nbrs is not None density = density if density is not None else exp_nbrs / (nnodes - 1) if size == 1: confounders = list(range(num_confounders)) nonconfounders = list(range(num_confounders, nnodes + num_confounders)) bools = _coin(confounder_pervasiveness, size=int(num_confounders * nnodes)) confounder_arcs = { (i, j) for (i, j), b in zip(itr.product(confounders, nonconfounders), bools) if b } bools = _coin(density, size=int(nnodes * (nnodes - 1) / 2)) local_arcs = { (i, j) for (i, j), b in zip(itr.combinations(nonconfounders, 2), bools) if b } d = DAG(nodes=set(range(nnodes)), arcs=confounder_arcs | local_arcs) if random_order: nodes = list(range(nnodes + num_confounders)) d = d.rename_nodes(dict(enumerate(np.random.permutation(nodes)))) return [d] if as_list else d else: return [ directed_erdos_with_confounders( nnodes, density, num_confounders=num_confounders, confounder_pervasiveness=confounder_pervasiveness, random_order=random_order) for _ in range(size) ]
from causaldag import DAG cancer_network = DAG( arcs={('Pollution', 'Cancer'), ('Smoker', 'Cancer'), ('Cancer', 'Xmy'), ('Cancer', 'Dysponoea')}) earthquake_network = DAG( arcs={('Burglary', 'Alarm'), ('Earthquake', 'Alarm'), ('Alarm', 'JohnCalls'), ('Alarm', 'MaryCalls')}) sachs_network = DAG( arcs={ ('PKC', 'PKA'), ('PKC', 'Jnk'), ('PKC', 'P38'), ('PKC', 'Raf'), ('PKC', 'Mek'), ('PKA', 'Jnk'), ('PKA', 'P38'), ('PKA', 'Raf'), ('PKA', 'Mek'), ('PKA', 'Erk'), ('PKA', 'Akt'), ('Raf', 'Mek'), ('Mek', 'Erk'), ('Erk', 'Akt'), ('Plcg', 'PIP3'), ('Plcg', 'PIP2'),
def perm2dag(perm: list, ci_tester: CI_Tester, verbose=False, fixed_adjacencies: Set[UndirectedEdge] = set(), fixed_gaps: Set[UndirectedEdge] = set(), node2nbrs=None, older=False, progress=False): """ Given a permutation, find the minimal IMAP consistent with that permutation and the results of conditional independence tests from ci_tester. Parameters ---------- perm: list of nodes representing the permutation. ci_tester: object for testing conditional independence. verbose: if True, log each CI test. fixed_adjacencies: set of nodes known to be adjacent. fixed_gaps: set of nodes known not to be adjacent. node2nbrs: TODO older: TODO Examples -------- >>> from causaldag.utils.ci_tests import MemoizedCI_Tester, gauss_ci_test, gauss_ci_suffstat >>> perm = [0,1,2] >>> suffstat = gauss_ci_suffstat(samples) >>> ci_tester = MemoizedCI_Tester(gauss_ci_test, suffstat) >>> perm2dag(perm, ci_tester, fixed_gaps={frozenset({1, 2})}) """ if fixed_adjacencies: adj = next(iter(fixed_adjacencies)) if not isinstance(adj, frozenset): raise ValueError('fixed_adjacencies should contain frozensets') if fixed_gaps: adj = next(iter(fixed_gaps)) if not isinstance(adj, frozenset): raise ValueError('fixed_gaps should contain frozensets') d = DAG(nodes=set(perm)) ixs = list( itr.chain.from_iterable( ((f, s) for f in range(s)) for s in range(len(perm)))) ixs = ixs if not progress else tqdm(ixs) for i, j in ixs: pi_i, pi_j = perm[i], perm[j] # === IF FIXED, DON'T TEST if frozenset({pi_i, pi_j}) in fixed_adjacencies: d.add_arc(pi_i, pi_j) continue if frozenset({pi_i, pi_j}) in fixed_gaps: continue # === TEST MARKOV BLANKET mb = d.markov_blanket(pi_i) if node2nbrs is None else ( set(perm[:j]) - {pi_i}) & (node2nbrs[pi_i] | node2nbrs[pi_j]) mb = mb if not older else set(perm[:j]) - {pi_i} is_ci = ci_tester.is_ci(pi_i, pi_j, mb) if not is_ci: d.add_arc(pi_i, pi_j, unsafe=True) if verbose: print(f"{pi_i} is independent of {pi_j} given {mb}: {is_ci}") return d