def train_model() -> StructureModel: """ This Bayesian Model structure will be used in all tests, and all fixtures will adhere to this structure. Cause-only nodes: [d, e] Effect-only nodes: [a, c] Cause / Effect nodes: [b] d ↙ ↓ ↘ a ← b → c ↑ ↗ e """ model = StructureModel() model.add_edges_from( [ ("b", "a"), ("b", "c"), ("d", "a"), ("d", "c"), ("d", "b"), ("e", "c"), ("e", "b"), ] ) return model
def _matrices_to_structure_model(w_est: np.ndarray, a_est: np.ndarray) -> StructureModel: """ Converts the matrices output by dynotears (W and A) into a StructureModel We use the following convention: - {var}_lag{l} where l is the lag value (i.e. from how many previous timestamps the edge is coming - if we deal with a intra_slice_node, `l == 0` Args: w_est: Intra-slice weight matrix a_est: Inter-slice matrix Returns: StructureModel representing the structure learnt """ sm = StructureModel() lag_cols = [ "{var}_lag{l_val}".format(var=var, l_val=l_val) for l_val in range(1 + (a_est.shape[0] // a_est.shape[1])) for var in range(a_est.shape[1]) ] sm.add_nodes_from(lag_cols) sm.add_edges_from([(lag_cols[i], lag_cols[j], dict(weight=w_est[i, j])) for i in range(w_est.shape[0]) for j in range(w_est.shape[1]) if w_est[i, j] != 0]) sm.add_edges_from([(lag_cols[i + w_est.shape[0]], lag_cols[j], dict(weight=a_est[i, j])) for i in range(a_est.shape[0]) for j in range(a_est.shape[1]) if a_est[i, j] != 0]) return sm
def make_default_scm(radiomic_features=None): sm = StructureModel() if radiomic_features is None: edges_list = { 'biopsy_grade': ['response'], 'subtypes': ['response'], 'histology': ['response'], 'clinical_nodal_status': ['stage'], 'stage': ['response'], } else: edges_list = { 'Age': ['ovarian_status', *radiomic_features], 'biopsy_grade': ['response'], 'subtypes': ['response'], 'histology': ['response'], 'clinical_nodal_status': ['stage'], 'stage': ['response'], **{ radiomic_feature: ['response'] for radiomic_feature in radiomic_features } } edges_list = [(k, dep) for k, v in edges_list.items() for dep in v] sm.add_edges_from(edges_list) return sm
def chain_network() -> BayesianNetwork: """ This Bayesian Model structure to test do interventions that split graph into subgraphs. a → b → c → d → e """ n = 50 nodes_names = list("abcde") random_binary_matrix = (np.random.randint(10, size=(n, len(nodes_names))) > 6).astype(int) df = pd.DataFrame(data=random_binary_matrix, columns=nodes_names) model = StructureModel() model.add_edges_from([ ("a", "b"), ("b", "c"), ("c", "d"), ("d", "e"), ]) chain_bn = BayesianNetwork(model) chain_bn = chain_bn.fit_node_states(df) chain_bn = chain_bn.fit_cpds(df, method="BayesianEstimator", bayes_prior="K2") return chain_bn
def test_add_edges_from_other(self): """edges added with other origin should throw an error""" sm = StructureModel() with pytest.raises(ValueError, match="^Unknown origin: must be one of.*$"): sm.add_edges_from([(1, 2)], "other")
def test_instance(self): """The subgraph returned should still be a StructureModel instance""" sm = StructureModel() sm.add_edges_from([(0, 1), (1, 2), (1, 3), (4, 6)]) subgraph = sm.get_target_subgraph(2) assert isinstance(subgraph, StructureModel)
def test_add_edges_from_expert(self): """edges added with expert origin should be labelled as expert origin""" sm = StructureModel() edges = [(1, 2), (2, 3)] sm.add_edges_from(edges, "expert") assert all(edge in sm.edges for edge in edges) assert all((u, v, "expert") in sm.edges.data("origin") for u, v in edges)
def test_add_edges_from_default(self): """edges added with default origin should be identified as unknown origin""" sm = StructureModel() edges = [(1, 2), (2, 3)] sm.add_edges_from(edges) assert all(edge in sm.edges for edge in edges) assert all((u, v, "unknown") in sm.edges.data("origin") for u, v in edges)
def test_add_edges_from_custom_attr(self): """it should be possible to add edges with custom attributes""" sm = StructureModel() edges = [(1, 2), (2, 3)] sm.add_edges_from(edges, x="Y") assert all(edge in sm.edges for edge in edges) assert all((u, v, "Y") in sm.edges.data("x") for u, v in edges)
def test_add_edges_from_multiple_times(self): """adding edges again should update the edges origin attr""" sm = StructureModel() edges = [(1, 2), (2, 3)] sm.add_edges_from(edges, "unknown") assert all((u, v, "unknown") in sm.edges.data("origin") for u, v in edges) sm.add_edges_from(edges, "learned") assert all((u, v, "learned") in sm.edges.data("origin") for u, v in edges)
def test_to_undirected(self): """should create an undirected Graph""" sm = StructureModel() sm.add_edges_from([(1, 2), (2, 1), (2, 3), (3, 4)]) udg = sm.to_undirected() assert all(edge in udg.edges for edge in [(2, 3), (3, 4)]) assert (1, 2) in udg.edges or (2, 1) in udg.edges assert len(udg.edges) == 3
def test_to_directed(self): """should create a structure model""" sm = StructureModel() edges = [(1, 2), (2, 1), (2, 3), (3, 4)] sm.add_edges_from(edges) dag = sm.to_directed() assert isinstance(dag, StructureModel) assert all(edge in dag.edges for edge in edges)
def test_get_subgraph_string(self, target_node, test_input, expected): """Should be able to return the subgraph with the specified node""" sm = StructureModel() sm.add_edges_from(test_input) subgraph = sm.get_target_subgraph(target_node) expected_graph = StructureModel() expected_graph.add_edges_from(expected) assert set(subgraph.nodes) == set(expected_graph.nodes) assert set(subgraph.edges) == set(expected_graph.edges)
def test_node_not_in_graph(self, target_node, test_input): """Should raise an error if the target_node is not found in the graph""" sm = StructureModel() sm.add_edges_from(test_input) with pytest.raises( NodeNotFound, match="Node {node} not found in the graph".format(node=target_node), ): sm.get_target_subgraph(target_node)
def test_get_largest_subgraph(self, test_input, expected): """Should be able to return the largest subgraph""" sm = StructureModel() sm.add_edges_from(test_input) largest_subgraph = sm.get_largest_subgraph() expected_graph = StructureModel() expected_graph.add_edges_from(expected) assert set(largest_subgraph.nodes) == set(expected_graph.nodes) assert set(largest_subgraph.edges) == set(expected_graph.edges)
def test_add_multiple_edges(self): """it should be possible to add multiple edges with different origins""" sm = StructureModel() sm.add_edges_from([(1, 2)], origin="unknown") sm.add_edges_from([(1, 3)], origin="learned") sm.add_edges_from([(1, 4)], origin="expert") assert (1, 2, "unknown") in sm.edges.data("origin") assert (1, 3, "learned") in sm.edges.data("origin") assert (1, 4, "expert") in sm.edges.data("origin")
def test_node_not_in_graph(self, target_node, test_input): """Should raise an error if the target_node is not found in the graph""" sm = StructureModel() sm.add_edges_from(test_input) with pytest.raises( NodeNotFound, match=f"Node {target_node} not found in the graph", ): sm.get_markov_blanket(target_node)
def test_get_markov_blanket_multiple(self, target_nodes, test_input, expected): """Should be able to return Markov blanket with the specified list of nodes""" sm = StructureModel() sm.add_edges_from(test_input) blanket = sm.get_markov_blanket(target_nodes) expected_graph = StructureModel() expected_graph.add_edges_from(expected) assert set(blanket.nodes) == set(expected_graph.nodes) assert set(blanket.edges) == set(expected_graph.edges)
def test_more_than_one_largest(self): """Return the first largest when there are more than one largest subgraph""" edges = [(0, 1), (1, 2), (3, 4), (3, 5)] sm = StructureModel() sm.add_edges_from(edges) largest_subgraph = sm.get_largest_subgraph() expected_edges = [(0, 1), (1, 2)] expected_graph = StructureModel() expected_graph.add_edges_from(expected_edges) assert set(largest_subgraph.nodes) == set(expected_graph.nodes) assert set(largest_subgraph.edges) == set(expected_graph.edges)
def test_get_target_subgraph_twice(self): """get_target_subgraph should be able to run more than once""" sm = StructureModel() sm.add_edges_from([(0, 1), (1, 2), (1, 3), (4, 6)]) subgraph = sm.get_target_subgraph(0) subgraph.remove_edge(0, 1) subgraph = subgraph.get_target_subgraph(1) expected_graph = StructureModel() expected_edges = [(1, 2), (1, 3)] expected_graph.add_edges_from(expected_edges) assert set(subgraph.nodes) == set(expected_graph.nodes) assert set(subgraph.edges) == set(expected_graph.edges)
def test_isolates_nodes_and_edges(self): """Should be able to return the subgraph with the specified node""" edges = [(0, 1), (1, 2), (1, 3), (5, 6), (4, 5)] isolated_nodes = [7, 8, 9] sm = StructureModel() sm.add_edges_from(edges) sm.add_nodes_from(isolated_nodes) subgraph = sm.get_target_subgraph(5) expected_edges = [(5, 6), (4, 5)] expected_graph = StructureModel() expected_graph.add_edges_from(expected_edges) assert set(subgraph.nodes) == set(expected_graph.nodes) assert set(subgraph.edges) == set(expected_graph.edges)
def test_create_inference_with_bad_variable_names_fails( self, train_model, train_data_idx): model = StructureModel() model.add_edges_from([(str(u).replace("a", "$a"), str(v).replace("a", "$a")) for u, v in train_model.edges]) train_data_idx.rename(columns={"a": "$a"}, inplace=True) bn = BayesianNetwork(model).fit_node_states(train_data_idx) bn.fit_cpds(train_data_idx) with pytest.raises(ValueError, match="Variable names must match.*"): InferenceEngine(bn)
def test_isolates_nodes_and_edges(self): """Should be able to return the largest subgraph""" edges = [(0, 1), (1, 2), (1, 3), (5, 6)] isolated_nodes = [7, 8, 9] sm = StructureModel() sm.add_edges_from(edges) sm.add_nodes_from(isolated_nodes) largest_subgraph = sm.get_largest_subgraph() expected_edges = [(0, 1), (1, 2), (1, 3)] expected_graph = StructureModel() expected_graph.add_edges_from(expected_edges) assert set(largest_subgraph.nodes) == set(expected_graph.nodes) assert set(largest_subgraph.edges) == set(expected_graph.edges)
import warnings from causalnex.structure import StructureModel from causalnex.plots import plot_structure, NODE_STYLE, EDGE_STYLE # from IPython.display import Image # import pygraphviz warnings.filterwarnings("ignore") # 도메인 지식으로 만드는 모델 sm = StructureModel() # 도메인 지식으로 연결한 노드 causal_relationships = [('health', 'absences'), ('health', 'G1')] # 모델에 엣지 추가 sm.add_edges_from(causal_relationships) # 시각화 viz = plot_structure(sm, graph_attributes={"scale": "0.5"}, all_node_attributes=NODE_STYLE.WEAK, all_edge_attributes=EDGE_STYLE.WEAK) # Image(viz.draw(format='png')) # 시각화한 객체를 파일로 저장 viz.draw('res.png')