class TestUndirectedGraphCreation(unittest.TestCase): def setUp(self): self.graph = UndirectedGraph() def test_class_init_without_data(self): self.assertIsInstance(self.graph, UndirectedGraph) def test_class_init_with_data_string(self): self.G = UndirectedGraph([("a", "b"), ("b", "c")]) self.assertListEqual(sorted(self.G.nodes()), ["a", "b", "c"]) self.assertListEqual(hf.recursive_sorted(self.G.edges()), [["a", "b"], ["b", "c"]]) def test_add_node_string(self): self.graph.add_node("a") self.assertListEqual(self.graph.nodes(), ["a"]) def test_add_node_nonstring(self): self.graph.add_node(1) self.assertListEqual(self.graph.nodes(), [1]) def test_add_nodes_from_string(self): self.graph.add_nodes_from(["a", "b", "c", "d"]) self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c", "d"]) def test_add_nodes_from_non_string(self): self.graph.add_nodes_from([1, 2, 3, 4]) def test_add_edge_string(self): self.graph.add_edge("d", "e") self.assertListEqual(sorted(self.graph.nodes()), ["d", "e"]) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [["d", "e"]]) self.graph.add_nodes_from(["a", "b", "c"]) self.graph.add_edge("a", "b") self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [["a", "b"], ["d", "e"]]) def test_add_edge_nonstring(self): self.graph.add_edge(1, 2) def test_add_edges_from_string(self): self.graph.add_edges_from([("a", "b"), ("b", "c")]) self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c"]) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [["a", "b"], ["b", "c"]]) self.graph.add_nodes_from(["d", "e", "f"]) self.graph.add_edges_from([("d", "e"), ("e", "f")]) self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c", "d", "e", "f"]) self.assertListEqual( hf.recursive_sorted(self.graph.edges()), hf.recursive_sorted([("a", "b"), ("b", "c"), ("d", "e"), ("e", "f")]), ) def test_add_edges_from_nonstring(self): self.graph.add_edges_from([(1, 2), (2, 3)]) def test_number_of_neighbors(self): self.graph.add_edges_from([("a", "b"), ("b", "c")]) self.assertEqual(len(self.graph.neighbors("b")), 2) def tearDown(self): del self.graph
class TestUndirectedGraphCreation(unittest.TestCase): def setUp(self): self.graph = UndirectedGraph() def test_class_init_without_data(self): self.assertIsInstance(self.graph, UndirectedGraph) def test_class_init_with_data_string(self): self.G = UndirectedGraph([('a', 'b'), ('b', 'c')]) self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c']) self.assertListEqual(hf.recursive_sorted(self.G.edges()), [['a', 'b'], ['b', 'c']]) def test_add_node_string(self): self.graph.add_node('a') self.assertListEqual(self.graph.nodes(), ['a']) def test_add_node_nonstring(self): self.graph.add_node(1) self.assertListEqual(self.graph.nodes(), [1]) def test_add_nodes_from_string(self): self.graph.add_nodes_from(['a', 'b', 'c', 'd']) self.assertListEqual(sorted(self.graph.nodes()), ['a', 'b', 'c', 'd']) def test_add_node_with_weight(self): self.graph.add_node('a') self.graph.add_node('weight_a', weight=0.3) self.assertEqual(self.graph.node['weight_a']['weight'], 0.3) self.assertEqual(self.graph.node['a']['weight'], None) def test_add_nodes_from_with_weight(self): self.graph.add_node(1) self.graph.add_nodes_from(['weight_b', 'weight_c'], weights=[0.3, 0.5]) self.assertEqual(self.graph.node['weight_b']['weight'], 0.3) self.assertEqual(self.graph.node['weight_c']['weight'], 0.5) self.assertEqual(self.graph.node[1]['weight'], None) def test_add_nodes_from_non_string(self): self.graph.add_nodes_from([1, 2, 3, 4]) def test_add_edge_string(self): self.graph.add_edge('d', 'e') self.assertListEqual(sorted(self.graph.nodes()), ['d', 'e']) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [['d', 'e']]) self.graph.add_nodes_from(['a', 'b', 'c']) self.graph.add_edge('a', 'b') self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [['a', 'b'], ['d', 'e']]) def test_add_edge_nonstring(self): self.graph.add_edge(1, 2) def test_add_edges_from_string(self): self.graph.add_edges_from([('a', 'b'), ('b', 'c')]) self.assertListEqual(sorted(self.graph.nodes()), ['a', 'b', 'c']) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [['a', 'b'], ['b', 'c']]) self.graph.add_nodes_from(['d', 'e', 'f']) self.graph.add_edges_from([('d', 'e'), ('e', 'f')]) self.assertListEqual(sorted(self.graph.nodes()), ['a', 'b', 'c', 'd', 'e', 'f']) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), hf.recursive_sorted([('a', 'b'), ('b', 'c'), ('d', 'e'), ('e', 'f')])) def test_add_edges_from_nonstring(self): self.graph.add_edges_from([(1, 2), (2, 3)]) def test_number_of_neighbors(self): self.graph.add_edges_from([('a', 'b'), ('b', 'c')]) self.assertEqual(len(self.graph.neighbors('b')), 2) def tearDown(self): del self.graph
class TestUndirectedGraphCreation(unittest.TestCase): def setUp(self): self.graph = UndirectedGraph() def test_class_init_without_data(self): self.assertIsInstance(self.graph, UndirectedGraph) def test_class_init_with_data_string(self): self.G = UndirectedGraph([('a', 'b'), ('b', 'c')]) self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c']) self.assertListEqual(hf.recursive_sorted(self.G.edges()), [['a', 'b'], ['b', 'c']]) def test_add_node_string(self): self.graph.add_node('a') self.assertListEqual(self.graph.nodes(), ['a']) def test_add_node_nonstring(self): self.graph.add_node(1) self.assertListEqual(self.graph.nodes(), [1]) def test_add_nodes_from_string(self): self.graph.add_nodes_from(['a', 'b', 'c', 'd']) self.assertListEqual(sorted(self.graph.nodes()), ['a', 'b', 'c', 'd']) def test_add_nodes_from_non_string(self): self.graph.add_nodes_from([1, 2, 3, 4]) def test_add_edge_string(self): self.graph.add_edge('d', 'e') self.assertListEqual(sorted(self.graph.nodes()), ['d', 'e']) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [['d', 'e']]) self.graph.add_nodes_from(['a', 'b', 'c']) self.graph.add_edge('a', 'b') self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [['a', 'b'], ['d', 'e']]) def test_add_edge_nonstring(self): self.graph.add_edge(1, 2) def test_add_edges_from_string(self): self.graph.add_edges_from([('a', 'b'), ('b', 'c')]) self.assertListEqual(sorted(self.graph.nodes()), ['a', 'b', 'c']) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [['a', 'b'], ['b', 'c']]) self.graph.add_nodes_from(['d', 'e', 'f']) self.graph.add_edges_from([('d', 'e'), ('e', 'f')]) self.assertListEqual(sorted(self.graph.nodes()), ['a', 'b', 'c', 'd', 'e', 'f']) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), hf.recursive_sorted([('a', 'b'), ('b', 'c'), ('d', 'e'), ('e', 'f')])) def test_add_edges_from_nonstring(self): self.graph.add_edges_from([(1, 2), (2, 3)]) def test_number_of_neighbors(self): self.graph.add_edges_from([('a', 'b'), ('b', 'c')]) self.assertEqual(len(self.graph.neighbors('b')), 2) def tearDown(self): del self.graph
def build_skeleton(nodes, independencies): """Estimates a graph skeleton (UndirectedGraph) from a set of independencies using (the first part of) the PC algorithm. The independencies can either be provided as an instance of the `Independencies`-class or by passing a decision function that decides any conditional independency assertion. Returns a tuple `(skeleton, separating_sets)`. If an Independencies-instance is passed, the contained IndependenceAssertions have to admit a faithful BN representation. This is the case if they are obtained as a set of d-seperations of some Bayesian network or if the independence assertions are closed under the semi-graphoid axioms. Otherwise the procedure may fail to identify the correct structure. Parameters ---------- nodes: list, array-like A list of node/variable names of the network skeleton. independencies: Independencies-instance or function. The source of independency information from which to build the skeleton. The provided Independencies should admit a faithful representation. Can either be provided as an Independencies()-instance or by passing a function `f(X, Y, Zs)` that returns `True` when X _|_ Y | Zs, otherwise `False`. (X, Y being individual nodes and Zs a list of nodes). Returns ------- skeleton: UndirectedGraph An estimate for the undirected graph skeleton of the BN underlying the data. separating_sets: dict A dict containing for each pair of not directly connected nodes a separating set ("witnessing set") of variables that makes then conditionally independent. (needed for edge orientation procedures) Reference --------- [1] Neapolitan, Learning Bayesian Networks, Section 10.1.2, Algorithm 10.2 (page 550) http://www.cs.technion.ac.il/~dang/books/Learning%20Bayesian%20Networks(Neapolitan,%20Richard).pdf [2] Koller & Friedman, Probabilistic Graphical Models - Principles and Techniques, 2009 Section 3.4.2.1 (page 85), Algorithm 3.3 Examples -------- >>> from pgmpy.estimators import ConstraintBasedEstimator >>> from pgmpy.models import BayesianModel >>> from pgmpy.independencies import Independencies >>> # build skeleton from list of independencies: ... ind = Independencies(['B', 'C'], ['A', ['B', 'C'], 'D']) >>> # we need to compute closure, otherwise this set of independencies doesn't ... # admit a faithful representation: ... ind = ind.closure() >>> skel, sep_sets = ConstraintBasedEstimator.build_skeleton("ABCD", ind) >>> print(skel.edges()) [('A', 'D'), ('B', 'D'), ('C', 'D')] >>> # build skeleton from d-seperations of BayesianModel: ... model = BayesianModel([('A', 'C'), ('B', 'C'), ('B', 'D'), ('C', 'E')]) >>> skel, sep_sets = ConstraintBasedEstimator.build_skeleton(model.nodes(), model.get_independencies()) >>> print(skel.edges()) [('A', 'C'), ('B', 'C'), ('B', 'D'), ('C', 'E')] """ nodes = list(nodes) if isinstance(independencies, Independencies): def is_independent(X, Y, Zs): return IndependenceAssertion(X, Y, Zs) in independencies elif callable(independencies): is_independent = independencies else: raise ValueError( "'independencies' must be either Independencies-instance " + "or a ternary function that decides independencies.") graph = UndirectedGraph(combinations(nodes, 2)) lim_neighbors = 0 separating_sets = dict() while not all( [len(graph.neighbors(node)) < lim_neighbors for node in nodes]): for node in nodes: for neighbor in graph.neighbors(node): # search if there is a set of neighbors (of size lim_neighbors) # that makes X and Y independent: for separating_set in combinations( set(graph.neighbors(node)) - set([neighbor]), lim_neighbors): if is_independent(node, neighbor, separating_set): separating_sets[frozenset( (node, neighbor))] = separating_set graph.remove_edge(node, neighbor) break lim_neighbors += 1 return graph, separating_sets
class TestUndirectedGraphCreation(unittest.TestCase): def setUp(self): self.graph = UndirectedGraph() def test_class_init_without_data(self): self.assertIsInstance(self.graph, UndirectedGraph) def test_class_init_with_data_string(self): self.G = UndirectedGraph([("a", "b"), ("b", "c")]) self.assertListEqual(sorted(self.G.nodes()), ["a", "b", "c"]) self.assertListEqual(hf.recursive_sorted(self.G.edges()), [["a", "b"], ["b", "c"]]) def test_add_node_string(self): self.graph.add_node("a") self.assertListEqual(list(self.graph.nodes()), ["a"]) def test_add_node_nonstring(self): self.graph.add_node(1) self.assertListEqual(list(self.graph.nodes()), [1]) def test_add_nodes_from_string(self): self.graph.add_nodes_from(["a", "b", "c", "d"]) self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c", "d"]) def test_add_node_with_weight(self): self.graph.add_node("a") self.graph.add_node("weight_a", weight=0.3) self.assertEqual(self.graph.nodes["weight_a"]["weight"], 0.3) self.assertEqual(self.graph.nodes["a"]["weight"], None) def test_add_nodes_from_with_weight(self): self.graph.add_node(1) self.graph.add_nodes_from(["weight_b", "weight_c"], weights=[0.3, 0.5]) self.assertEqual(self.graph.nodes["weight_b"]["weight"], 0.3) self.assertEqual(self.graph.nodes["weight_c"]["weight"], 0.5) self.assertEqual(self.graph.nodes[1]["weight"], None) def test_add_nodes_from_non_string(self): self.graph.add_nodes_from([1, 2, 3, 4]) def test_add_edge_string(self): self.graph.add_edge("d", "e") self.assertListEqual(sorted(self.graph.nodes()), ["d", "e"]) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [["d", "e"]]) self.graph.add_nodes_from(["a", "b", "c"]) self.graph.add_edge("a", "b") self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [["a", "b"], ["d", "e"]]) def test_add_edge_nonstring(self): self.graph.add_edge(1, 2) def test_add_edges_from_string(self): self.graph.add_edges_from([("a", "b"), ("b", "c")]) self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c"]) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [["a", "b"], ["b", "c"]]) self.graph.add_nodes_from(["d", "e", "f"]) self.graph.add_edges_from([("d", "e"), ("e", "f")]) self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c", "d", "e", "f"]) self.assertListEqual( hf.recursive_sorted(self.graph.edges()), hf.recursive_sorted([("a", "b"), ("b", "c"), ("d", "e"), ("e", "f")]), ) def test_add_edges_from_nonstring(self): self.graph.add_edges_from([(1, 2), (2, 3)]) def test_number_of_neighbors(self): self.graph.add_edges_from([("a", "b"), ("b", "c")]) self.assertEqual(len(list(self.graph.neighbors("b"))), 2) def tearDown(self): del self.graph
def build_skeleton(nodes, independencies): """Estimates a graph skeleton (UndirectedGraph) from a set of independencies using (the first part of) the PC algorithm. The independencies can either be provided as an instance of the `Independencies`-class or by passing a decision function that decides any conditional independency assertion. Returns a tuple `(skeleton, separating_sets)`. If an Independencies-instance is passed, the contained IndependenceAssertions have to admit a faithful BN representation. This is the case if they are obtained as a set of d-seperations of some Bayesian network or if the independence assertions are closed under the semi-graphoid axioms. Otherwise the procedure may fail to identify the correct structure. Parameters ---------- nodes: list, array-like A list of node/variable names of the network skeleton. independencies: Independencies-instance or function. The source of independency information from which to build the skeleton. The provided Independencies should admit a faithful representation. Can either be provided as an Independencies()-instance or by passing a function `f(X, Y, Zs)` that returns `True` when X _|_ Y | Zs, otherwise `False`. (X, Y being individual nodes and Zs a list of nodes). Returns ------- skeleton: UndirectedGraph An estimate for the undirected graph skeleton of the BN underlying the data. separating_sets: dict A dict containing for each pair of not directly connected nodes a separating set ("witnessing set") of variables that makes then conditionally independent. (needed for edge orientation procedures) Reference --------- [1] Neapolitan, Learning Bayesian Networks, Section 10.1.2, Algorithm 10.2 (page 550) http://www.cs.technion.ac.il/~dang/books/Learning%20Bayesian%20Networks(Neapolitan,%20Richard).pdf [2] Koller & Friedman, Probabilistic Graphical Models - Principles and Techniques, 2009 Section 3.4.2.1 (page 85), Algorithm 3.3 Examples -------- >>> from pgmpy.estimators import ConstraintBasedEstimator >>> from pgmpy.models import BayesianModel >>> from pgmpy.independencies import Independencies >>> # build skeleton from list of independencies: ... ind = Independencies(['B', 'C'], ['A', ['B', 'C'], 'D']) >>> # we need to compute closure, otherwise this set of independencies doesn't ... # admit a faithful representation: ... ind = ind.closure() >>> skel, sep_sets = ConstraintBasedEstimator.build_skeleton("ABCD", ind) >>> print(skel.edges()) [('A', 'D'), ('B', 'D'), ('C', 'D')] >>> # build skeleton from d-seperations of BayesianModel: ... model = BayesianModel([('A', 'C'), ('B', 'C'), ('B', 'D'), ('C', 'E')]) >>> skel, sep_sets = ConstraintBasedEstimator.build_skeleton(model.nodes(), model.get_independencies()) >>> print(skel.edges()) [('A', 'C'), ('B', 'C'), ('B', 'D'), ('C', 'E')] """ nodes = list(nodes) if isinstance(independencies, Independencies): def is_independent(X, Y, Zs): return IndependenceAssertion(X, Y, Zs) in independencies elif callable(independencies): is_independent = independencies else: raise ValueError("'independencies' must be either Independencies-instance " + "or a ternary function that decides independencies.") graph = UndirectedGraph(combinations(nodes, 2)) lim_neighbors = 0 separating_sets = dict() while not all([len(graph.neighbors(node)) < lim_neighbors for node in nodes]): for node in nodes: for neighbor in graph.neighbors(node): # search if there is a set of neighbors (of size lim_neighbors) # that makes X and Y independent: for separating_set in combinations(set(graph.neighbors(node)) - set([neighbor]), lim_neighbors): if is_independent(node, neighbor, separating_set): separating_sets[frozenset((node, neighbor))] = separating_set graph.remove_edge(node, neighbor) break lim_neighbors += 1 return graph, separating_sets