def random_dag(number_of_nodes: int = 5, edge_density: float = 0.4, max_in_degree: int = 4) -> DAG: """Create a connected, random directed acyclic graph (DAG), with the given number of nodes, the given edge density, and with no node exceeding having too high in degree""" node_names = [f"X{i}" for i in range(number_of_nodes)] dag = DAG() # First make sure the dag is connected visited = list() unvisited = list(node_names) node = random.choice(unvisited) unvisited.remove(node) visited.append(node) dag.add_node(node) while unvisited: node = random.choice(unvisited) neighbor = random.choice(visited) if node_names.index(node) < node_names.index( neighbor) and dag.in_degree(neighbor) < max_in_degree: dag.add_edge(node, neighbor) elif node_names.index(neighbor) < node_names.index(node): dag.add_edge(neighbor, node) else: continue unvisited.remove(node) visited.append(node) # Then add edges until desired density is reached maximum_number_of_edges = number_of_nodes * (number_of_nodes - 1) / 2 while dag.number_of_edges() < int(edge_density * maximum_number_of_edges): add_random_edge(dag, node_names) return dag
def add_random_edge(dag: DAG, node_order: List[str], max_in_degree: int = 4) -> None: """Add a random edge to the graph, that respects the given node_order, and also doesn't add a link if the sampled node has maximal in_degree already. It may not add any edge. """ n1, n2 = random.sample(node_order, 2) if node_order.index(n1) < node_order.index(n2) and dag.in_degree( n2) < max_in_degree: dag.add_edge(n1, n2) elif node_order.index(n2) < node_order.index(n1) and dag.in_degree( n1) < max_in_degree: dag.add_edge(n2, n1)
class TestDAGCreation(unittest.TestCase): def setUp(self): self.graph = DAG() def test_class_init_without_data(self): self.assertIsInstance(self.graph, DAG) def test_class_init_with_data_string(self): self.graph = DAG([("a", "b"), ("b", "c")]) self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c"]) self.assertListEqual( hf.recursive_sorted(self.graph.edges()), [["a", "b"], ["b", "c"]] ) def test_add_node_string(self): self.graph.add_node("a") self.assertListEqual(list(self.graph.nodes()), ["a"]) def test_add_node_nonstring(self): self.graph.add_node(1) def test_add_nodes_from_string(self): self.graph.add_nodes_from(["a", "b", "c", "d"]) self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c", "d"]) def test_add_nodes_from_non_string(self): self.graph.add_nodes_from([1, 2, 3, 4]) def test_add_node_weight(self): self.graph.add_node("weighted_a", 0.3) self.assertEqual(self.graph.nodes["weighted_a"]["weight"], 0.3) def test_add_nodes_from_weight(self): self.graph.add_nodes_from(["weighted_b", "weighted_c"], [0.5, 0.6]) self.assertEqual(self.graph.nodes["weighted_b"]["weight"], 0.5) self.assertEqual(self.graph.nodes["weighted_c"]["weight"], 0.6) self.graph.add_nodes_from(["e", "f"]) self.assertEqual(self.graph.nodes["e"]["weight"], None) self.assertEqual(self.graph.nodes["f"]["weight"], None) def test_add_edge_string(self): self.graph.add_edge("d", "e") self.assertListEqual(sorted(self.graph.nodes()), ["d", "e"]) self.assertListEqual(list(self.graph.edges()), [("d", "e")]) self.graph.add_nodes_from(["a", "b", "c"]) self.graph.add_edge("a", "b") self.assertListEqual( hf.recursive_sorted(self.graph.edges()), [["a", "b"], ["d", "e"]] ) def test_add_edge_nonstring(self): self.graph.add_edge(1, 2) def test_add_edges_from_string(self): self.graph.add_edges_from([("a", "b"), ("b", "c")]) self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c"]) self.assertListEqual( hf.recursive_sorted(self.graph.edges()), [["a", "b"], ["b", "c"]] ) self.graph.add_nodes_from(["d", "e", "f"]) self.graph.add_edges_from([("d", "e"), ("e", "f")]) self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c", "d", "e", "f"]) self.assertListEqual( hf.recursive_sorted(self.graph.edges()), hf.recursive_sorted([("a", "b"), ("b", "c"), ("d", "e"), ("e", "f")]), ) def test_add_edges_from_nonstring(self): self.graph.add_edges_from([(1, 2), (2, 3)]) def test_add_edge_weight(self): self.graph.add_edge("a", "b", weight=0.3) if nx.__version__.startswith("1"): self.assertEqual(self.graph.edge["a"]["b"]["weight"], 0.3) else: self.assertEqual(self.graph.adj["a"]["b"]["weight"], 0.3) def test_add_edges_from_weight(self): self.graph.add_edges_from([("b", "c"), ("c", "d")], weights=[0.5, 0.6]) if nx.__version__.startswith("1"): self.assertEqual(self.graph.edge["b"]["c"]["weight"], 0.5) self.assertEqual(self.graph.edge["c"]["d"]["weight"], 0.6) self.graph.add_edges_from([("e", "f")]) self.assertEqual(self.graph.edge["e"]["f"]["weight"], None) else: self.assertEqual(self.graph.adj["b"]["c"]["weight"], 0.5) self.assertEqual(self.graph.adj["c"]["d"]["weight"], 0.6) self.graph.add_edges_from([("e", "f")]) self.assertEqual(self.graph.adj["e"]["f"]["weight"], None) def test_update_node_parents_bm_constructor(self): self.graph = DAG([("a", "b"), ("b", "c")]) self.assertListEqual(list(self.graph.predecessors("a")), []) self.assertListEqual(list(self.graph.predecessors("b")), ["a"]) self.assertListEqual(list(self.graph.predecessors("c")), ["b"]) def test_update_node_parents(self): self.graph.add_nodes_from(["a", "b", "c"]) self.graph.add_edges_from([("a", "b"), ("b", "c")]) self.assertListEqual(list(self.graph.predecessors("a")), []) self.assertListEqual(list(self.graph.predecessors("b")), ["a"]) self.assertListEqual(list(self.graph.predecessors("c")), ["b"]) def test_get_leaves(self): self.graph.add_edges_from( [("A", "B"), ("B", "C"), ("B", "D"), ("D", "E"), ("D", "F"), ("A", "G")] ) self.assertEqual(sorted(self.graph.get_leaves()), sorted(["C", "G", "E", "F"])) def test_get_roots(self): self.graph.add_edges_from( [("A", "B"), ("B", "C"), ("B", "D"), ("D", "E"), ("D", "F"), ("A", "G")] ) self.assertEqual(["A"], self.graph.get_roots()) self.graph.add_edge("H", "G") self.assertEqual(sorted(["A", "H"]), sorted(self.graph.get_roots())) def test_init_with_cycle(self): self.assertRaises(ValueError, DAG, [("a", "a")]) self.assertRaises(ValueError, DAG, [("a", "b"), ("b", "a")]) self.assertRaises(ValueError, DAG, [("a", "b"), ("b", "c"), ("c", "a")]) def tearDown(self): del self.graph
def pdag_to_dag(pdag): """Completes a PDAG to a DAG, without adding v-structures, if such a completion exists. If no faithful extension is possible, some fully oriented DAG that corresponds to the PDAG is returned and a warning is generated. This is a static method. Parameters ---------- pdag: DAG A directed acyclic graph pattern, consisting in (acyclic) directed edges as well as "undirected" edges, represented as both-way edges between nodes. Returns ------- dag: DAG A faithful orientation of pdag, if one exists. Otherwise any fully orientated DAG/BayesianModel with the structure of pdag. References ---------- [1] Chickering, Learning Equivalence Classes of Bayesian-Network Structures, 2002; See page 454 (last paragraph) for the algorithm pdag_to_dag http://www.jmlr.org/papers/volume2/chickering02a/chickering02a.pdf [2] Dor & Tarsi, A simple algorithm to construct a consistent extension of a partially oriented graph, 1992, http://ftp.cs.ucla.edu/pub/stat_ser/r185-dor-tarsi.pdf Examples -------- >>> import pandas as pd >>> import numpy as np >>> from pgmpy.base import DAG >>> from pgmpy.estimators import ConstraintBasedEstimator >>> data = pd.DataFrame(np.random.randint(0, 4, size=(5000, 3)), columns=list('ABD')) >>> data['C'] = data['A'] - data['B'] >>> data['D'] += data['A'] >>> c = ConstraintBasedEstimator(data) >>> pdag = c.skeleton_to_pdag(*c.estimate_skeleton()) >>> pdag.edges() [('B', 'C'), ('D', 'A'), ('A', 'D'), ('A', 'C')] >>> c.pdag_to_dag(pdag).edges() [('B', 'C'), ('A', 'D'), ('A', 'C')] >>> # pdag_to_dag is static: ... pdag1 = DAG([('A', 'B'), ('C', 'B'), ('C', 'D'), ('D', 'C'), ('D', 'A'), ('A', 'D')]) >>> ConstraintBasedEstimator.pdag_to_dag(pdag1).edges() [('D', 'C'), ('C', 'B'), ('A', 'B'), ('A', 'D')] >>> # example of a pdag with no faithful extension: ... pdag2 = DAG([('A', 'B'), ('A', 'C'), ('B', 'C'), ('C', 'B')]) >>> ConstraintBasedEstimator.pdag_to_dag(pdag2).edges() UserWarning: PDAG has no faithful extension (= no oriented DAG with the same v-structures as PDAG). Remaining undirected PDAG edges oriented arbitrarily. [('B', 'C'), ('A', 'B'), ('A', 'C')] """ pdag = pdag.copy() dag = DAG() dag.add_nodes_from(pdag.nodes()) # add already directed edges of pdag to dag for X, Y in pdag.edges(): if not pdag.has_edge(Y, X): dag.add_edge(X, Y) while pdag.number_of_nodes() > 0: # find node with (1) no directed outgoing edges and # (2) the set of undirected neighbors is either empty or # undirected neighbors + parents of X are a clique found = False for X in pdag.nodes(): directed_outgoing_edges = set(pdag.successors(X)) - set( pdag.predecessors(X)) undirected_neighbors = set(pdag.successors(X)) & set( pdag.predecessors(X)) neighbors_are_clique = all((pdag.has_edge(Y, Z) for Z in pdag.predecessors(X) for Y in undirected_neighbors if not Y == Z)) if not directed_outgoing_edges and (not undirected_neighbors or neighbors_are_clique): found = True # add all edges of X as outgoing edges to dag for Y in pdag.predecessors(X): dag.add_edge(Y, X) pdag.remove_node(X) break if not found: warn( "PDAG has no faithful extension (= no oriented DAG with the " + "same v-structures as PDAG). Remaining undirected PDAG edges " + "oriented arbitrarily.") for X, Y in pdag.edges(): if not dag.has_edge(Y, X): try: dag.add_edge(X, Y) except ValueError: pass break return dag