def moralize(self): """ Removes all the immoralities in the Network and creates a moral graph (UndirectedGraph). A v-structure X->Z<-Y is an immorality if there is no directed edge between X and Y. Examples -------- >>> from pgmpy.models import DynamicBayesianNetwork as DBN >>> dbn = DBN([(('D',0), ('G',0)), (('I',0), ('G',0))]) >>> moral_graph = dbn.moralize() >>> moral_graph.edges() [(('G', 0), ('I', 0)), (('G', 0), ('D', 0)), (('D', 1), ('I', 1)), (('D', 1), ('G', 1)), (('I', 0), ('D', 0)), (('G', 1), ('I', 1))] """ moral_graph = UndirectedGraph(self.to_undirected().edges()) for node in super().nodes(): moral_graph.add_edges_from(itertools.combinations( self.get_parents(node), 2)) return moral_graph
class TestUndirectedGraphCreation(unittest.TestCase): def setUp(self): self.graph = UndirectedGraph() def test_class_init_without_data(self): self.assertIsInstance(self.graph, UndirectedGraph) def test_class_init_with_data_string(self): self.G = UndirectedGraph([("a", "b"), ("b", "c")]) self.assertListEqual(sorted(self.G.nodes()), ["a", "b", "c"]) self.assertListEqual(hf.recursive_sorted(self.G.edges()), [["a", "b"], ["b", "c"]]) def test_add_node_string(self): self.graph.add_node("a") self.assertListEqual(self.graph.nodes(), ["a"]) def test_add_node_nonstring(self): self.graph.add_node(1) self.assertListEqual(self.graph.nodes(), [1]) def test_add_nodes_from_string(self): self.graph.add_nodes_from(["a", "b", "c", "d"]) self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c", "d"]) def test_add_nodes_from_non_string(self): self.graph.add_nodes_from([1, 2, 3, 4]) def test_add_edge_string(self): self.graph.add_edge("d", "e") self.assertListEqual(sorted(self.graph.nodes()), ["d", "e"]) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [["d", "e"]]) self.graph.add_nodes_from(["a", "b", "c"]) self.graph.add_edge("a", "b") self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [["a", "b"], ["d", "e"]]) def test_add_edge_nonstring(self): self.graph.add_edge(1, 2) def test_add_edges_from_string(self): self.graph.add_edges_from([("a", "b"), ("b", "c")]) self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c"]) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [["a", "b"], ["b", "c"]]) self.graph.add_nodes_from(["d", "e", "f"]) self.graph.add_edges_from([("d", "e"), ("e", "f")]) self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c", "d", "e", "f"]) self.assertListEqual( hf.recursive_sorted(self.graph.edges()), hf.recursive_sorted([("a", "b"), ("b", "c"), ("d", "e"), ("e", "f")]), ) def test_add_edges_from_nonstring(self): self.graph.add_edges_from([(1, 2), (2, 3)]) def test_number_of_neighbors(self): self.graph.add_edges_from([("a", "b"), ("b", "c")]) self.assertEqual(len(self.graph.neighbors("b")), 2) def tearDown(self): del self.graph
def moralize(self): """ Removes all the immoralities in the DirectedGraph and creates a moral graph (UndirectedGraph). A v-structure X->Z<-Y is an immorality if there is no directed edge between X and Y. Examples -------- >>> from pgmpy.base import DirectedGraph >>> G = DirectedGraph(ebunch=[('diff', 'grade'), ('intel', 'grade')]) >>> moral_graph = G.moralize() >>> moral_graph.edges() [('intel', 'grade'), ('intel', 'diff'), ('grade', 'diff')] """ moral_graph = UndirectedGraph(self.to_undirected().edges()) for node in self.nodes(): moral_graph.add_edges_from( itertools.combinations(self.get_parents(node), 2)) return moral_graph
def mmpc(self, significance_level=0.01): nodes = self.state_names.keys() def assoc(X, Y, Zs): """Measure for (conditional) association between variables. Use negative p-value of independence test. """ return 1 - chi_square(X, Y, Zs, self.data)[1] def min_assoc(X, Y, Zs): "Minimal association of X, Y given any subset of Zs." return min(assoc(X, Y, Zs_subset) for Zs_subset in powerset(Zs)) def max_min_heuristic(X, Zs): "Finds variable that maximizes min_assoc with `node` relative to `neighbors`." max_min_assoc = 0 best_Y = None for Y in set(nodes) - set(Zs + [X]): min_assoc_val = min_assoc(X, Y, Zs) if min_assoc_val >= max_min_assoc: best_Y = Y max_min_assoc = min_assoc_val return (best_Y, max_min_assoc) # Find parents and children for each node neighbors = dict() for node in nodes: neighbors[node] = [] # Forward Phase while True: new_neighbor, new_neighbor_min_assoc = max_min_heuristic( node, neighbors[node]) if new_neighbor_min_assoc > 0: neighbors[node].append(new_neighbor) else: break # Backward Phase for neigh in neighbors[node]: other_neighbors = [n for n in neighbors[node] if n != neigh] for sep_set in powerset(other_neighbors): if self.test_conditional_independence( node, neigh, sep_set): neighbors[node].remove(neigh) break # correct for false positives for node in nodes: print(node, ":", neighbors[node]) print(neighbors[node]) for neigh in neighbors[node]: if node not in neighbors[neigh]: print("node-%s is removed" % neigh) neighbors[node].remove(neigh) skel = UndirectedGraph() skel.add_nodes_from(nodes) for node in nodes: print(node, "->", neighbors[node]) skel.add_edges_from([(node, neigh) for neigh in neighbors[node]]) return skel
class TestUndirectedGraphCreation(unittest.TestCase): def setUp(self): self.graph = UndirectedGraph() def test_class_init_without_data(self): self.assertIsInstance(self.graph, UndirectedGraph) def test_class_init_with_data_string(self): self.G = UndirectedGraph([('a', 'b'), ('b', 'c')]) self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c']) self.assertListEqual(hf.recursive_sorted(self.G.edges()), [['a', 'b'], ['b', 'c']]) def test_add_node_string(self): self.graph.add_node('a') self.assertListEqual(self.graph.nodes(), ['a']) def test_add_node_nonstring(self): self.graph.add_node(1) self.assertListEqual(self.graph.nodes(), [1]) def test_add_nodes_from_string(self): self.graph.add_nodes_from(['a', 'b', 'c', 'd']) self.assertListEqual(sorted(self.graph.nodes()), ['a', 'b', 'c', 'd']) def test_add_node_with_weight(self): self.graph.add_node('a') self.graph.add_node('weight_a', weight=0.3) self.assertEqual(self.graph.node['weight_a']['weight'], 0.3) self.assertEqual(self.graph.node['a']['weight'], None) def test_add_nodes_from_with_weight(self): self.graph.add_node(1) self.graph.add_nodes_from(['weight_b', 'weight_c'], weights=[0.3, 0.5]) self.assertEqual(self.graph.node['weight_b']['weight'], 0.3) self.assertEqual(self.graph.node['weight_c']['weight'], 0.5) self.assertEqual(self.graph.node[1]['weight'], None) def test_add_nodes_from_non_string(self): self.graph.add_nodes_from([1, 2, 3, 4]) def test_add_edge_string(self): self.graph.add_edge('d', 'e') self.assertListEqual(sorted(self.graph.nodes()), ['d', 'e']) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [['d', 'e']]) self.graph.add_nodes_from(['a', 'b', 'c']) self.graph.add_edge('a', 'b') self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [['a', 'b'], ['d', 'e']]) def test_add_edge_nonstring(self): self.graph.add_edge(1, 2) def test_add_edges_from_string(self): self.graph.add_edges_from([('a', 'b'), ('b', 'c')]) self.assertListEqual(sorted(self.graph.nodes()), ['a', 'b', 'c']) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [['a', 'b'], ['b', 'c']]) self.graph.add_nodes_from(['d', 'e', 'f']) self.graph.add_edges_from([('d', 'e'), ('e', 'f')]) self.assertListEqual(sorted(self.graph.nodes()), ['a', 'b', 'c', 'd', 'e', 'f']) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), hf.recursive_sorted([('a', 'b'), ('b', 'c'), ('d', 'e'), ('e', 'f')])) def test_add_edges_from_nonstring(self): self.graph.add_edges_from([(1, 2), (2, 3)]) def test_number_of_neighbors(self): self.graph.add_edges_from([('a', 'b'), ('b', 'c')]) self.assertEqual(len(self.graph.neighbors('b')), 2) def tearDown(self): del self.graph
class TestUndirectedGraphCreation(unittest.TestCase): def setUp(self): self.graph = UndirectedGraph() def test_class_init_without_data(self): self.assertIsInstance(self.graph, UndirectedGraph) def test_class_init_with_data_string(self): self.G = UndirectedGraph([('a', 'b'), ('b', 'c')]) self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c']) self.assertListEqual(hf.recursive_sorted(self.G.edges()), [['a', 'b'], ['b', 'c']]) def test_add_node_string(self): self.graph.add_node('a') self.assertListEqual(self.graph.nodes(), ['a']) def test_add_node_nonstring(self): self.graph.add_node(1) self.assertListEqual(self.graph.nodes(), [1]) def test_add_nodes_from_string(self): self.graph.add_nodes_from(['a', 'b', 'c', 'd']) self.assertListEqual(sorted(self.graph.nodes()), ['a', 'b', 'c', 'd']) def test_add_nodes_from_non_string(self): self.graph.add_nodes_from([1, 2, 3, 4]) def test_add_edge_string(self): self.graph.add_edge('d', 'e') self.assertListEqual(sorted(self.graph.nodes()), ['d', 'e']) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [['d', 'e']]) self.graph.add_nodes_from(['a', 'b', 'c']) self.graph.add_edge('a', 'b') self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [['a', 'b'], ['d', 'e']]) def test_add_edge_nonstring(self): self.graph.add_edge(1, 2) def test_add_edges_from_string(self): self.graph.add_edges_from([('a', 'b'), ('b', 'c')]) self.assertListEqual(sorted(self.graph.nodes()), ['a', 'b', 'c']) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [['a', 'b'], ['b', 'c']]) self.graph.add_nodes_from(['d', 'e', 'f']) self.graph.add_edges_from([('d', 'e'), ('e', 'f')]) self.assertListEqual(sorted(self.graph.nodes()), ['a', 'b', 'c', 'd', 'e', 'f']) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), hf.recursive_sorted([('a', 'b'), ('b', 'c'), ('d', 'e'), ('e', 'f')])) def test_add_edges_from_nonstring(self): self.graph.add_edges_from([(1, 2), (2, 3)]) def test_number_of_neighbors(self): self.graph.add_edges_from([('a', 'b'), ('b', 'c')]) self.assertEqual(len(self.graph.neighbors('b')), 2) def tearDown(self): del self.graph
class TestUndirectedGraphCreation(unittest.TestCase): def setUp(self): self.graph = UndirectedGraph() def test_class_init_without_data(self): self.assertIsInstance(self.graph, UndirectedGraph) def test_class_init_with_data_string(self): self.G = UndirectedGraph([("a", "b"), ("b", "c")]) self.assertListEqual(sorted(self.G.nodes()), ["a", "b", "c"]) self.assertListEqual(hf.recursive_sorted(self.G.edges()), [["a", "b"], ["b", "c"]]) def test_add_node_string(self): self.graph.add_node("a") self.assertListEqual(list(self.graph.nodes()), ["a"]) def test_add_node_nonstring(self): self.graph.add_node(1) self.assertListEqual(list(self.graph.nodes()), [1]) def test_add_nodes_from_string(self): self.graph.add_nodes_from(["a", "b", "c", "d"]) self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c", "d"]) def test_add_node_with_weight(self): self.graph.add_node("a") self.graph.add_node("weight_a", weight=0.3) self.assertEqual(self.graph.nodes["weight_a"]["weight"], 0.3) self.assertEqual(self.graph.nodes["a"]["weight"], None) def test_add_nodes_from_with_weight(self): self.graph.add_node(1) self.graph.add_nodes_from(["weight_b", "weight_c"], weights=[0.3, 0.5]) self.assertEqual(self.graph.nodes["weight_b"]["weight"], 0.3) self.assertEqual(self.graph.nodes["weight_c"]["weight"], 0.5) self.assertEqual(self.graph.nodes[1]["weight"], None) def test_add_nodes_from_non_string(self): self.graph.add_nodes_from([1, 2, 3, 4]) def test_add_edge_string(self): self.graph.add_edge("d", "e") self.assertListEqual(sorted(self.graph.nodes()), ["d", "e"]) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [["d", "e"]]) self.graph.add_nodes_from(["a", "b", "c"]) self.graph.add_edge("a", "b") self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [["a", "b"], ["d", "e"]]) def test_add_edge_nonstring(self): self.graph.add_edge(1, 2) def test_add_edges_from_string(self): self.graph.add_edges_from([("a", "b"), ("b", "c")]) self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c"]) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [["a", "b"], ["b", "c"]]) self.graph.add_nodes_from(["d", "e", "f"]) self.graph.add_edges_from([("d", "e"), ("e", "f")]) self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c", "d", "e", "f"]) self.assertListEqual( hf.recursive_sorted(self.graph.edges()), hf.recursive_sorted([("a", "b"), ("b", "c"), ("d", "e"), ("e", "f")]), ) def test_add_edges_from_nonstring(self): self.graph.add_edges_from([(1, 2), (2, 3)]) def test_number_of_neighbors(self): self.graph.add_edges_from([("a", "b"), ("b", "c")]) self.assertEqual(len(list(self.graph.neighbors("b"))), 2) def tearDown(self): del self.graph
def mmpc(self, data, nodes): """ Estimates a graph skeleton (UndirectedGraph) for the data set, using the MMPC (max-min parents-and-children) algorithm. :return: graph skeleton """ def is_independent(X, Y, Zs, cb_estimator): """ Returns result of hypothesis test for the null hypothesis that X _|_ Y | Zs, using a chi2 statistic and threshold `significance_level`. """ if (tuple(sorted([X, Y])), tuple(sorted(Zs))) in self.p_val_cache: p_value, sufficient_data = self.p_val_cache.get( (tuple(sorted([X, Y])), tuple(sorted(Zs)))) else: chi2, p_value, sufficient_data = cb_estimator.test_conditional_independence( X, Y, Zs) self.p_val_cache.update({ (tuple(sorted([X, Y])), tuple(sorted(Zs))): (p_value, sufficient_data) }) return p_value >= self.alpha and sufficient_data def assoc(X, Y, Zs, cb_estimator): """ Measure for (conditional) association between variables. Use negative p-value of independence test. """ if (tuple(sorted([X, Y])), tuple(sorted(Zs))) in self.p_val_cache: p_value, sufficient_data = self.p_val_cache.get( (tuple(sorted([X, Y])), tuple(sorted(Zs)))) else: chi2, p_value, sufficient_data = cb_estimator.test_conditional_independence( X, Y, Zs) self.p_val_cache.update({ (tuple(sorted([X, Y])), tuple(sorted(Zs))): (p_value, sufficient_data) }) return 1 - p_value def min_assoc(X, Y, Zs, cb_estimator): """ Minimal association of X, Y given any subset of Zs. """ min_association = float('inf') for size in range(min(self.max_reach, len(Zs)) + 1): partial_min_association = min( assoc(X, Y, Zs_subset, cb_estimator) for Zs_subset in combinations(Zs, size)) if partial_min_association < min_association: min_association = partial_min_association return min_association def max_min_heuristic(X, Zs): """ Finds variable that maximizes min_assoc with `node` relative to `neighbors`. """ max_min_assoc = 0 best_Y = None for Y in set(nodes) - set(Zs + [X]): min_assoc_val = min_assoc(X, Y, Zs, cb_estimator) if min_assoc_val >= max_min_assoc: best_Y = Y max_min_assoc = min_assoc_val return best_Y, max_min_assoc cb_estimator = BaseEstimator(data=data, complete_samples_only=False) # Find parents and children for each node neighbors = dict() for node in nodes: neighbors[node] = [] # Forward Phase while True: new_neighbor, new_neighbor_min_assoc = max_min_heuristic( node, neighbors[node]) if new_neighbor_min_assoc > 0: neighbors[node].append(new_neighbor) else: break # Backward Phase for neigh in neighbors[node]: other_neighbors = [n for n in neighbors[node] if n != neigh] sep_sets = [ sep_set for sep_set_size in range( min(self.max_reach, len(other_neighbors)) + 1) for sep_set in combinations(other_neighbors, sep_set_size) ] for sep_set in sep_sets: if is_independent(node, neigh, sep_set, cb_estimator): neighbors[node].remove(neigh) break # correct for false positives for node in nodes: for neigh in neighbors[node]: if node not in neighbors[neigh]: neighbors[node].remove(neigh) skel = UndirectedGraph() skel.add_nodes_from(nodes) for node in nodes: skel.add_edges_from([(node, neigh) for neigh in neighbors[node]]) return skel