Пример #1
0
def train_model() -> StructureModel:
    """
    This Bayesian Model structure will be used in all tests, and all fixtures will adhere to this structure.

    Cause-only nodes: [d, e]
    Effect-only nodes: [a, c]
    Cause / Effect nodes: [b]

            d
         ↙  ↓  ↘
        a ← b → c
            ↑  ↗
            e
    """
    model = StructureModel()
    model.add_edges_from(
        [
            ("b", "a"),
            ("b", "c"),
            ("d", "a"),
            ("d", "c"),
            ("d", "b"),
            ("e", "c"),
            ("e", "b"),
        ]
    )
    return model
Пример #2
0
def _matrices_to_structure_model(w_est: np.ndarray,
                                 a_est: np.ndarray) -> StructureModel:
    """
    Converts the matrices output by dynotears (W and A) into a StructureModel
    We use the following convention:
    - {var}_lag{l} where l is the lag value (i.e. from how many previous timestamps the edge is coming
    - if we deal with a intra_slice_node, `l == 0`
    Args:
        w_est: Intra-slice weight matrix
        a_est: Inter-slice matrix

    Returns:
        StructureModel representing the structure learnt

    """
    sm = StructureModel()
    lag_cols = [
        "{var}_lag{l_val}".format(var=var, l_val=l_val)
        for l_val in range(1 + (a_est.shape[0] // a_est.shape[1]))
        for var in range(a_est.shape[1])
    ]
    sm.add_nodes_from(lag_cols)
    sm.add_edges_from([(lag_cols[i], lag_cols[j], dict(weight=w_est[i, j]))
                       for i in range(w_est.shape[0])
                       for j in range(w_est.shape[1]) if w_est[i, j] != 0])
    sm.add_edges_from([(lag_cols[i + w_est.shape[0]], lag_cols[j],
                        dict(weight=a_est[i, j]))
                       for i in range(a_est.shape[0])
                       for j in range(a_est.shape[1]) if a_est[i, j] != 0])
    return sm
Пример #3
0
def make_default_scm(radiomic_features=None):
    sm = StructureModel()
    if radiomic_features is None:

        edges_list = {
            'biopsy_grade': ['response'],
            'subtypes': ['response'],
            'histology': ['response'],
            'clinical_nodal_status': ['stage'],
            'stage': ['response'],
        }

    else:

        edges_list = {
            'Age': ['ovarian_status', *radiomic_features],
            'biopsy_grade': ['response'],
            'subtypes': ['response'],
            'histology': ['response'],
            'clinical_nodal_status': ['stage'],
            'stage': ['response'],
            **{
                radiomic_feature: ['response']
                for radiomic_feature in radiomic_features
            }
        }

    edges_list = [(k, dep) for k, v in edges_list.items() for dep in v]
    sm.add_edges_from(edges_list)

    return sm
Пример #4
0
def chain_network() -> BayesianNetwork:
    """
    This Bayesian Model structure to test do interventions that split graph
    into subgraphs.

    a → b → c → d → e
    """
    n = 50
    nodes_names = list("abcde")
    random_binary_matrix = (np.random.randint(10, size=(n, len(nodes_names))) >
                            6).astype(int)
    df = pd.DataFrame(data=random_binary_matrix, columns=nodes_names)

    model = StructureModel()
    model.add_edges_from([
        ("a", "b"),
        ("b", "c"),
        ("c", "d"),
        ("d", "e"),
    ])
    chain_bn = BayesianNetwork(model)
    chain_bn = chain_bn.fit_node_states(df)
    chain_bn = chain_bn.fit_cpds(df,
                                 method="BayesianEstimator",
                                 bayes_prior="K2")
    return chain_bn
Пример #5
0
    def test_add_edges_from_other(self):
        """edges added with other origin should throw an error"""

        sm = StructureModel()

        with pytest.raises(ValueError, match="^Unknown origin: must be one of.*$"):
            sm.add_edges_from([(1, 2)], "other")
Пример #6
0
    def test_instance(self):
        """The subgraph returned should still be a StructureModel instance"""
        sm = StructureModel()
        sm.add_edges_from([(0, 1), (1, 2), (1, 3), (4, 6)])

        subgraph = sm.get_target_subgraph(2)

        assert isinstance(subgraph, StructureModel)
Пример #7
0
    def test_add_edges_from_expert(self):
        """edges added with expert origin should be labelled as expert origin"""

        sm = StructureModel()
        edges = [(1, 2), (2, 3)]
        sm.add_edges_from(edges, "expert")

        assert all(edge in sm.edges for edge in edges)
        assert all((u, v, "expert") in sm.edges.data("origin") for u, v in edges)
Пример #8
0
    def test_add_edges_from_default(self):
        """edges added with default origin should be identified as unknown origin"""

        sm = StructureModel()
        edges = [(1, 2), (2, 3)]
        sm.add_edges_from(edges)

        assert all(edge in sm.edges for edge in edges)
        assert all((u, v, "unknown") in sm.edges.data("origin") for u, v in edges)
Пример #9
0
    def test_add_edges_from_custom_attr(self):
        """it should be possible to add edges with custom attributes"""

        sm = StructureModel()
        edges = [(1, 2), (2, 3)]
        sm.add_edges_from(edges, x="Y")

        assert all(edge in sm.edges for edge in edges)
        assert all((u, v, "Y") in sm.edges.data("x") for u, v in edges)
Пример #10
0
    def test_add_edges_from_multiple_times(self):
        """adding edges again should update the edges origin attr"""

        sm = StructureModel()
        edges = [(1, 2), (2, 3)]
        sm.add_edges_from(edges, "unknown")
        assert all((u, v, "unknown") in sm.edges.data("origin") for u, v in edges)
        sm.add_edges_from(edges, "learned")
        assert all((u, v, "learned") in sm.edges.data("origin") for u, v in edges)
Пример #11
0
    def test_to_undirected(self):
        """should create an undirected Graph"""

        sm = StructureModel()
        sm.add_edges_from([(1, 2), (2, 1), (2, 3), (3, 4)])

        udg = sm.to_undirected()
        assert all(edge in udg.edges for edge in [(2, 3), (3, 4)])
        assert (1, 2) in udg.edges or (2, 1) in udg.edges
        assert len(udg.edges) == 3
Пример #12
0
    def test_to_directed(self):
        """should create a structure model"""

        sm = StructureModel()
        edges = [(1, 2), (2, 1), (2, 3), (3, 4)]
        sm.add_edges_from(edges)

        dag = sm.to_directed()
        assert isinstance(dag, StructureModel)
        assert all(edge in dag.edges for edge in edges)
Пример #13
0
    def test_get_subgraph_string(self, target_node, test_input, expected):
        """Should be able to return the subgraph with the specified node"""
        sm = StructureModel()
        sm.add_edges_from(test_input)
        subgraph = sm.get_target_subgraph(target_node)
        expected_graph = StructureModel()
        expected_graph.add_edges_from(expected)

        assert set(subgraph.nodes) == set(expected_graph.nodes)
        assert set(subgraph.edges) == set(expected_graph.edges)
Пример #14
0
    def test_node_not_in_graph(self, target_node, test_input):
        """Should raise an error if the target_node is not found in the graph"""

        sm = StructureModel()
        sm.add_edges_from(test_input)

        with pytest.raises(
            NodeNotFound,
            match="Node {node} not found in the graph".format(node=target_node),
        ):
            sm.get_target_subgraph(target_node)
Пример #15
0
    def test_get_largest_subgraph(self, test_input, expected):
        """Should be able to return the largest subgraph"""
        sm = StructureModel()
        sm.add_edges_from(test_input)
        largest_subgraph = sm.get_largest_subgraph()

        expected_graph = StructureModel()
        expected_graph.add_edges_from(expected)

        assert set(largest_subgraph.nodes) == set(expected_graph.nodes)
        assert set(largest_subgraph.edges) == set(expected_graph.edges)
Пример #16
0
    def test_add_multiple_edges(self):
        """it should be possible to add multiple edges with different origins"""

        sm = StructureModel()
        sm.add_edges_from([(1, 2)], origin="unknown")
        sm.add_edges_from([(1, 3)], origin="learned")
        sm.add_edges_from([(1, 4)], origin="expert")

        assert (1, 2, "unknown") in sm.edges.data("origin")
        assert (1, 3, "learned") in sm.edges.data("origin")
        assert (1, 4, "expert") in sm.edges.data("origin")
    def test_node_not_in_graph(self, target_node, test_input):
        """Should raise an error if the target_node is not found in the graph"""

        sm = StructureModel()
        sm.add_edges_from(test_input)

        with pytest.raises(
                NodeNotFound,
                match=f"Node {target_node} not found in the graph",
        ):
            sm.get_markov_blanket(target_node)
    def test_get_markov_blanket_multiple(self, target_nodes, test_input,
                                         expected):
        """Should be able to return Markov blanket with the specified list of nodes"""

        sm = StructureModel()
        sm.add_edges_from(test_input)
        blanket = sm.get_markov_blanket(target_nodes)
        expected_graph = StructureModel()
        expected_graph.add_edges_from(expected)

        assert set(blanket.nodes) == set(expected_graph.nodes)
        assert set(blanket.edges) == set(expected_graph.edges)
Пример #19
0
    def test_more_than_one_largest(self):
        """Return the first largest when there are more than one largest subgraph"""

        edges = [(0, 1), (1, 2), (3, 4), (3, 5)]
        sm = StructureModel()
        sm.add_edges_from(edges)
        largest_subgraph = sm.get_largest_subgraph()

        expected_edges = [(0, 1), (1, 2)]
        expected_graph = StructureModel()
        expected_graph.add_edges_from(expected_edges)

        assert set(largest_subgraph.nodes) == set(expected_graph.nodes)
        assert set(largest_subgraph.edges) == set(expected_graph.edges)
Пример #20
0
    def test_get_target_subgraph_twice(self):
        """get_target_subgraph should be able to run more than once"""
        sm = StructureModel()
        sm.add_edges_from([(0, 1), (1, 2), (1, 3), (4, 6)])

        subgraph = sm.get_target_subgraph(0)
        subgraph.remove_edge(0, 1)
        subgraph = subgraph.get_target_subgraph(1)

        expected_graph = StructureModel()
        expected_edges = [(1, 2), (1, 3)]
        expected_graph.add_edges_from(expected_edges)

        assert set(subgraph.nodes) == set(expected_graph.nodes)
        assert set(subgraph.edges) == set(expected_graph.edges)
Пример #21
0
    def test_isolates_nodes_and_edges(self):
        """Should be able to return the subgraph with the specified node"""

        edges = [(0, 1), (1, 2), (1, 3), (5, 6), (4, 5)]
        isolated_nodes = [7, 8, 9]
        sm = StructureModel()
        sm.add_edges_from(edges)
        sm.add_nodes_from(isolated_nodes)
        subgraph = sm.get_target_subgraph(5)
        expected_edges = [(5, 6), (4, 5)]
        expected_graph = StructureModel()
        expected_graph.add_edges_from(expected_edges)

        assert set(subgraph.nodes) == set(expected_graph.nodes)
        assert set(subgraph.edges) == set(expected_graph.edges)
Пример #22
0
    def test_create_inference_with_bad_variable_names_fails(
            self, train_model, train_data_idx):

        model = StructureModel()
        model.add_edges_from([(str(u).replace("a",
                                              "$a"), str(v).replace("a", "$a"))
                              for u, v in train_model.edges])

        train_data_idx.rename(columns={"a": "$a"}, inplace=True)

        bn = BayesianNetwork(model).fit_node_states(train_data_idx)
        bn.fit_cpds(train_data_idx)

        with pytest.raises(ValueError, match="Variable names must match.*"):
            InferenceEngine(bn)
Пример #23
0
    def test_isolates_nodes_and_edges(self):
        """Should be able to return the largest subgraph"""

        edges = [(0, 1), (1, 2), (1, 3), (5, 6)]
        isolated_nodes = [7, 8, 9]
        sm = StructureModel()
        sm.add_edges_from(edges)
        sm.add_nodes_from(isolated_nodes)
        largest_subgraph = sm.get_largest_subgraph()

        expected_edges = [(0, 1), (1, 2), (1, 3)]
        expected_graph = StructureModel()
        expected_graph.add_edges_from(expected_edges)

        assert set(largest_subgraph.nodes) == set(expected_graph.nodes)
        assert set(largest_subgraph.edges) == set(expected_graph.edges)
Пример #24
0
import warnings
from causalnex.structure import StructureModel
from causalnex.plots import plot_structure, NODE_STYLE, EDGE_STYLE
# from IPython.display import Image
# import pygraphviz

warnings.filterwarnings("ignore")

# 도메인 지식으로 만드는 모델
sm = StructureModel()

# 도메인 지식으로 연결한 노드
causal_relationships = [('health', 'absences'), ('health', 'G1')]

# 모델에 엣지 추가
sm.add_edges_from(causal_relationships)

# 시각화
viz = plot_structure(sm,
                     graph_attributes={"scale": "0.5"},
                     all_node_attributes=NODE_STYLE.WEAK,
                     all_edge_attributes=EDGE_STYLE.WEAK)

# Image(viz.draw(format='png'))

# 시각화한 객체를 파일로 저장
viz.draw('res.png')