class TestDoOperator(unittest.TestCase): def setUp(self): self.graph = DAG() self.graph.add_edges_from([("X", "A"), ("A", "Y"), ("A", "B")]) def test_do(self): dag_do_x = self.graph.do("A") self.assertEqual(set(dag_do_x.nodes()), set(self.graph.nodes())) self.assertEqual(sorted(list(dag_do_x.edges())), [("A", "B"), ("A", "Y")])
def estimate( self, start=None, tabu_length=0, max_indegree=None, epsilon=1e-4, max_iter=1e6 ): """ Performs local hill climb search to estimates the `DAG` structure that has optimal score, according to the scoring method supplied in the constructor. Starts at model `start` and proceeds by step-by-step network modifications until a local maximum is reached. Only estimates network structure, no parametrization. Parameters ---------- start: DAG instance The starting point for the local search. By default a completely disconnected network is used. tabu_length: int If provided, the last `tabu_length` graph modifications cannot be reversed during the search procedure. This serves to enforce a wider exploration of the search space. Default value: 100. max_indegree: int or None If provided and unequal None, the procedure only searches among models where all nodes have at most `max_indegree` parents. Defaults to None. epsilon: float (default: 1e-4) Defines the exit condition. If the improvement in score is less than `epsilon`, the learned model is returned. max_iter: int (default: 1e6) The maximum number of iterations allowed. Returns the learned model when the number of iterations is greater than `max_iter`. Returns ------- model: `DAG` instance A `DAG` at a (local) score maximum. Examples -------- >>> import pandas as pd >>> import numpy as np >>> from pgmpy.estimators import HillClimbSearch, BicScore >>> # create data sample with 9 random variables: ... data = pd.DataFrame(np.random.randint(0, 5, size=(5000, 9)), columns=list('ABCDEFGHI')) >>> # add 10th dependent variable ... data['J'] = data['A'] * data['B'] >>> est = HillClimbSearch(data, scoring_method=BicScore(data)) >>> best_model = est.estimate() >>> sorted(best_model.nodes()) ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'] >>> best_model.edges() [('B', 'J'), ('A', 'J')] >>> # search a model with restriction on the number of parents: >>> est.estimate(max_indegree=1).edges() [('J', 'A'), ('B', 'J')] """ nodes = self.state_names.keys() if start is None: start = DAG() start.add_nodes_from(nodes) elif not isinstance(start, DAG) or not set(start.nodes()) == set(nodes): raise ValueError( "'start' should be a DAG with the same variables as the data set, or 'None'." ) tabu_list = [] current_model = start iter_no = 0 while iter_no <= max_iter: iter_no += 1 best_score_delta = 0 best_operation = None for operation, score_delta in self._legal_operations( current_model, tabu_list, max_indegree ): if score_delta > best_score_delta: best_operation = operation best_score_delta = score_delta if best_operation is None or best_score_delta < epsilon: break elif best_operation[0] == "+": current_model.add_edge(*best_operation[1]) tabu_list = ([("-", best_operation[1])] + tabu_list)[:tabu_length] elif best_operation[0] == "-": current_model.remove_edge(*best_operation[1]) tabu_list = ([("+", best_operation[1])] + tabu_list)[:tabu_length] elif best_operation[0] == "flip": X, Y = best_operation[1] current_model.remove_edge(X, Y) current_model.add_edge(Y, X) tabu_list = ([best_operation] + tabu_list)[:tabu_length] return current_model
class TestDAGCreation(unittest.TestCase): def setUp(self): self.graph = DAG() def test_class_init_without_data(self): self.assertIsInstance(self.graph, DAG) def test_class_init_with_data_string(self): self.graph = DAG([("a", "b"), ("b", "c")]) self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c"]) self.assertListEqual( hf.recursive_sorted(self.graph.edges()), [["a", "b"], ["b", "c"]] ) def test_add_node_string(self): self.graph.add_node("a") self.assertListEqual(list(self.graph.nodes()), ["a"]) def test_add_node_nonstring(self): self.graph.add_node(1) def test_add_nodes_from_string(self): self.graph.add_nodes_from(["a", "b", "c", "d"]) self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c", "d"]) def test_add_nodes_from_non_string(self): self.graph.add_nodes_from([1, 2, 3, 4]) def test_add_node_weight(self): self.graph.add_node("weighted_a", 0.3) self.assertEqual(self.graph.nodes["weighted_a"]["weight"], 0.3) def test_add_nodes_from_weight(self): self.graph.add_nodes_from(["weighted_b", "weighted_c"], [0.5, 0.6]) self.assertEqual(self.graph.nodes["weighted_b"]["weight"], 0.5) self.assertEqual(self.graph.nodes["weighted_c"]["weight"], 0.6) self.graph.add_nodes_from(["e", "f"]) self.assertEqual(self.graph.nodes["e"]["weight"], None) self.assertEqual(self.graph.nodes["f"]["weight"], None) def test_add_edge_string(self): self.graph.add_edge("d", "e") self.assertListEqual(sorted(self.graph.nodes()), ["d", "e"]) self.assertListEqual(list(self.graph.edges()), [("d", "e")]) self.graph.add_nodes_from(["a", "b", "c"]) self.graph.add_edge("a", "b") self.assertListEqual( hf.recursive_sorted(self.graph.edges()), [["a", "b"], ["d", "e"]] ) def test_add_edge_nonstring(self): self.graph.add_edge(1, 2) def test_add_edges_from_string(self): self.graph.add_edges_from([("a", "b"), ("b", "c")]) self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c"]) self.assertListEqual( hf.recursive_sorted(self.graph.edges()), [["a", "b"], ["b", "c"]] ) self.graph.add_nodes_from(["d", "e", "f"]) self.graph.add_edges_from([("d", "e"), ("e", "f")]) self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c", "d", "e", "f"]) self.assertListEqual( hf.recursive_sorted(self.graph.edges()), hf.recursive_sorted([("a", "b"), ("b", "c"), ("d", "e"), ("e", "f")]), ) def test_add_edges_from_nonstring(self): self.graph.add_edges_from([(1, 2), (2, 3)]) def test_add_edge_weight(self): self.graph.add_edge("a", "b", weight=0.3) if nx.__version__.startswith("1"): self.assertEqual(self.graph.edge["a"]["b"]["weight"], 0.3) else: self.assertEqual(self.graph.adj["a"]["b"]["weight"], 0.3) def test_add_edges_from_weight(self): self.graph.add_edges_from([("b", "c"), ("c", "d")], weights=[0.5, 0.6]) if nx.__version__.startswith("1"): self.assertEqual(self.graph.edge["b"]["c"]["weight"], 0.5) self.assertEqual(self.graph.edge["c"]["d"]["weight"], 0.6) self.graph.add_edges_from([("e", "f")]) self.assertEqual(self.graph.edge["e"]["f"]["weight"], None) else: self.assertEqual(self.graph.adj["b"]["c"]["weight"], 0.5) self.assertEqual(self.graph.adj["c"]["d"]["weight"], 0.6) self.graph.add_edges_from([("e", "f")]) self.assertEqual(self.graph.adj["e"]["f"]["weight"], None) def test_update_node_parents_bm_constructor(self): self.graph = DAG([("a", "b"), ("b", "c")]) self.assertListEqual(list(self.graph.predecessors("a")), []) self.assertListEqual(list(self.graph.predecessors("b")), ["a"]) self.assertListEqual(list(self.graph.predecessors("c")), ["b"]) def test_update_node_parents(self): self.graph.add_nodes_from(["a", "b", "c"]) self.graph.add_edges_from([("a", "b"), ("b", "c")]) self.assertListEqual(list(self.graph.predecessors("a")), []) self.assertListEqual(list(self.graph.predecessors("b")), ["a"]) self.assertListEqual(list(self.graph.predecessors("c")), ["b"]) def test_get_leaves(self): self.graph.add_edges_from( [("A", "B"), ("B", "C"), ("B", "D"), ("D", "E"), ("D", "F"), ("A", "G")] ) self.assertEqual(sorted(self.graph.get_leaves()), sorted(["C", "G", "E", "F"])) def test_get_roots(self): self.graph.add_edges_from( [("A", "B"), ("B", "C"), ("B", "D"), ("D", "E"), ("D", "F"), ("A", "G")] ) self.assertEqual(["A"], self.graph.get_roots()) self.graph.add_edge("H", "G") self.assertEqual(sorted(["A", "H"]), sorted(self.graph.get_roots())) def test_init_with_cycle(self): self.assertRaises(ValueError, DAG, [("a", "a")]) self.assertRaises(ValueError, DAG, [("a", "b"), ("b", "a")]) self.assertRaises(ValueError, DAG, [("a", "b"), ("b", "c"), ("c", "a")]) def tearDown(self): del self.graph