示例#1
0
def chain_network() -> BayesianNetwork:
    """
    This Bayesian Model structure to test do interventions that split graph
    into subgraphs.

    a → b → c → d → e
    """
    n = 50
    nodes_names = list("abcde")
    random_binary_matrix = (np.random.randint(10, size=(n, len(nodes_names))) >
                            6).astype(int)
    df = pd.DataFrame(data=random_binary_matrix, columns=nodes_names)

    model = StructureModel()
    model.add_edges_from([
        ("a", "b"),
        ("b", "c"),
        ("c", "d"),
        ("d", "e"),
    ])
    chain_bn = BayesianNetwork(model)
    chain_bn = chain_bn.fit_node_states(df)
    chain_bn = chain_bn.fit_cpds(df,
                                 method="BayesianEstimator",
                                 bayes_prior="K2")
    return chain_bn
示例#2
0
    def test_all_nodes_exist(self):
        """Both connected and unconnected nodes should exist"""
        sm = StructureModel([("a", "b")])
        sm.add_node("c")
        a_graph = plot_structure(sm)

        assert all(node in a_graph.nodes() for node in ["a", "b", "c"])
示例#3
0
    def __init__(self, vars: Union[int, List[str]],
                 causal_graph: StructureModel = None,
                 env_type: str = 'Switchboard',
                 state_repeats: int = 1,
                 allow_interventions: bool = True):
        self.allow_interventions = allow_interventions
        self.env_type = env_type
        if type(vars) == int:
            self.var_names = ['x' + str(i) for i in range(vars)]
        else:
            self.var_names = vars

        # initialize causal model
        if causal_graph:
            self.causal_model = causal_graph
        else:
            self.causal_model = StructureModel()
            [self.causal_model.add_node(name) for name in self.var_names]
            self.reset_causal_model()

        # initialize the storages for observational and interventional data.
        self.collected_data = {}

        self.action_space = None
        self.observation_space = None
        self.actions = []
        self.current_action = None
        self.state_repeats = state_repeats
示例#4
0
    def test_f1score_generated(self, adjacency_mat_num_stability):
        """Structure learnt from regularisation should have very high f1 score relative to the ground truth"""
        df = pd.DataFrame(
            adjacency_mat_num_stability,
            columns=["a", "b", "c", "d", "e"],
            index=["a", "b", "c", "d", "e"],
        )
        train_model = StructureModel(df.values)
        X = generate_continuous_dataframe(StructureModel(df),
                                          50,
                                          noise_scale=1,
                                          seed=1)
        g = from_numpy(X[["a", "b", "c", "d", "e"]].values,
                       lasso_beta=0.1,
                       w_threshold=0.25)
        right_edges = train_model.edges

        n_predictions_made = len(g.edges)
        n_correct_predictions = len(
            set(g.edges).intersection(set(right_edges)))
        n_relevant_predictions = len(right_edges)
        precision = n_correct_predictions / n_predictions_made
        recall = n_correct_predictions / n_relevant_predictions
        f1_score = 2 * (precision * recall) / (precision + recall)

        assert f1_score > 0.85
示例#5
0
 def test_all_states_included(self):
     """All states in a node should be included"""
     cg = StructureModel()
     cg.add_weighted_edges_from([("a", "b", 1)])
     bn = BayesianNetwork(cg).fit_node_states(
         pd.DataFrame([[i, i] for i in range(10)], columns=["a", "b"]))
     assert all(v in bn.node_states["a"] for v in range(10))
示例#6
0
def train_model() -> StructureModel:
    """
    This Bayesian Model structure will be used in all tests, and all fixtures will adhere to this structure.

    Cause-only nodes: [d, e]
    Effect-only nodes: [a, c]
    Cause / Effect nodes: [b]

            d
         ↙  ↓  ↘
        a ← b → c
            ↑  ↗
            e
    """
    model = StructureModel()
    model.add_edges_from(
        [
            ("b", "a"),
            ("b", "c"),
            ("d", "a"),
            ("d", "c"),
            ("d", "b"),
            ("e", "c"),
            ("e", "b"),
        ]
    )
    return model
示例#7
0
 def test_fit_with_null_states_raises_error(self):
     """An error should be raised if fit is called with null data"""
     cg = StructureModel()
     cg.add_weighted_edges_from([("a", "b", 1)])
     with pytest.raises(ValueError, match="node '.*' contains None state"):
         BayesianNetwork(cg).fit_node_states(
             pd.DataFrame([[None, 1]], columns=["a", "b"]))
示例#8
0
    def test_intercept(self, distribution, noise_scale):
        graph = StructureModel()
        graph.add_node("123")

        data_noint = generate_continuous_data(
            graph,
            n_samples=100000,
            distribution=distribution,
            noise_scale=noise_scale,
            seed=10,
            intercept=False,
        )
        data_intercept = generate_continuous_data(
            graph,
            n_samples=100000,
            distribution=distribution,
            noise_scale=noise_scale,
            seed=10,
            intercept=True,
        )
        assert not np.isclose(data_noint[:, 0].mean(),
                              data_intercept[:, 0].mean())
        assert np.isclose(data_noint[:, 0].std(),
                          data_intercept[:, 0].std(),
                          rtol=0.01)
示例#9
0
def make_default_scm(radiomic_features=None):
    sm = StructureModel()
    if radiomic_features is None:

        edges_list = {
            'biopsy_grade': ['response'],
            'subtypes': ['response'],
            'histology': ['response'],
            'clinical_nodal_status': ['stage'],
            'stage': ['response'],
        }

    else:

        edges_list = {
            'Age': ['ovarian_status', *radiomic_features],
            'biopsy_grade': ['response'],
            'subtypes': ['response'],
            'histology': ['response'],
            'clinical_nodal_status': ['stage'],
            'stage': ['response'],
            **{
                radiomic_feature: ['response']
                for radiomic_feature in radiomic_features
            }
        }

    edges_list = [(k, dep) for k, v in edges_list.items() for dep in v]
    sm.add_edges_from(edges_list)

    return sm
示例#10
0
    def test_intercept(self, distribution, n_categories, noise_scale):
        graph = StructureModel()
        graph.add_node("A")

        data_noint = generate_categorical_dataframe(
            graph,
            100000,
            distribution,
            noise_scale=noise_scale,
            n_categories=n_categories,
            seed=10,
            intercept=False,
        )
        data_intercept = generate_categorical_dataframe(
            graph,
            100000,
            distribution,
            noise_scale=noise_scale,
            n_categories=n_categories,
            seed=10,
            intercept=True,
        )

        # NOTE: as n_categories increases, the probability that at least one category with
        # intercept=True will be the same as intercept=False -> 1.0
        num_similar = np.isclose(data_intercept.mean(axis=0),
                                 data_noint.mean(axis=0),
                                 atol=0.05,
                                 rtol=0).sum()
        assert num_similar < n_categories / 2
示例#11
0
    def test_add_weighted_edges_from_other(self):
        """edges added with other origin should throw an error"""

        sm = StructureModel()

        with pytest.raises(ValueError, match="^Unknown origin: must be one of.*$"):
            sm.add_weighted_edges_from([(1, 2, 0.5)], origin="other")
    def test_isolates(self):
        """Should return None if the structure model only contains isolates"""

        nodes = [1, 3, 5, 2, 7]
        sm = StructureModel()
        sm.add_nodes_from(nodes)
        assert sm.get_largest_subgraph() is None
示例#13
0
    def test_number_of_nodes(self, num_nodes):
        """Length of each row in generated data equals num_nodes"""
        graph = StructureModel()
        edges = [(n, n + 1, 1) for n in range(num_nodes - 1)]
        graph.add_weighted_edges_from(edges)

        data = generate_binary_data(graph, 100, seed=10)
        assert all(len(sample) == num_nodes for sample in data)
示例#14
0
    def test_add_edge_unknown(self):
        """edges added with unknown origin should be labelled as unknown origin"""

        sm = StructureModel()
        sm.add_edge(1, 2, "unknown")

        assert (1, 2) in sm.edges
        assert (1, 2, "unknown") in sm.edges.data("origin")
示例#15
0
 def test_get_indices_empty_iterator(self, schema):
     graph = StructureModel()
     # add node without parents:
     graph.add_node(10)
     mapper = VariableFeatureMapper(schema)
     x = mapper.get_indices(graph.predecessors(10))
     assert len(x) == 0
     assert isinstance(x, list)
示例#16
0
    def test_add_edge_custom_attr(self):
        """it should be possible to add an edge with custom attributes"""

        sm = StructureModel()
        sm.add_edge(1, 2, x="Y")

        assert (1, 2) in sm.edges
        assert (1, 2, "Y") in sm.edges.data("x")
示例#17
0
    def test_instance(self):
        """The subgraph returned should still be a StructureModel instance"""
        sm = StructureModel()
        sm.add_edges_from([(0, 1), (1, 2), (1, 3), (4, 6)])

        subgraph = sm.get_target_subgraph(2)

        assert isinstance(subgraph, StructureModel)
示例#18
0
    def test_add_edge_expert(self):
        """edges added with expert origin should be labelled as expert origin"""

        sm = StructureModel()
        sm.add_edge(1, 2, "expert")

        assert (1, 2) in sm.edges
        assert (1, 2, "expert") in sm.edges.data("origin")
示例#19
0
    def test_add_edge_learned(self):
        """edges added with learned origin should be labelled as learned origin"""

        sm = StructureModel()
        sm.add_edge(1, 2, "learned")

        assert (1, 2) in sm.edges
        assert (1, 2, "learned") in sm.edges.data("origin")
示例#20
0
    def test_add_edge_default(self):
        """edges added with default origin should be identified as unknown origin"""

        sm = StructureModel()
        sm.add_edge(1, 2)

        assert (1, 2) in sm.edges
        assert (1, 2, "unknown") in sm.edges.data("origin")
示例#21
0
    def test_add_weighted_edges_from_custom_attr(self):
        """it should be possible to add edges with custom attributes"""

        sm = StructureModel()
        edges = [(1, 2, 0.5), (2, 3, 0.5)]
        sm.add_weighted_edges_from(edges, x="Y")

        assert all((u, v, w) in sm.edges.data("weight") for u, v, w in edges)
        assert all((u, v, "Y") in sm.edges.data("x") for u, v, _ in edges)
示例#22
0
    def test_add_weighted_edges_from_expert(self):
        """edges added with expert origin should be labelled as expert origin"""

        sm = StructureModel()
        edges = [(1, 2, 0.5), (2, 3, 0.5)]
        sm.add_weighted_edges_from(edges, origin="expert")

        assert all((u, v, w) in sm.edges.data("weight") for u, v, w in edges)
        assert all((u, v, "expert") in sm.edges.data("origin") for u, v, w in edges)
示例#23
0
    def test_add_weighted_edges_from_default(self):
        """edges added with default origin should be identified as unknown origin"""

        sm = StructureModel()
        edges = [(1, 2, 0.5), (2, 3, 0.5)]
        sm.add_weighted_edges_from(edges)

        assert all((u, v, w) in sm.edges.data("weight") for u, v, w in edges)
        assert all((u, v, "unknown") in sm.edges.data("origin") for u, v, w in edges)
示例#24
0
    def test_add_edges_from_custom_attr(self):
        """it should be possible to add edges with custom attributes"""

        sm = StructureModel()
        edges = [(1, 2), (2, 3)]
        sm.add_edges_from(edges, x="Y")

        assert all(edge in sm.edges for edge in edges)
        assert all((u, v, "Y") in sm.edges.data("x") for u, v in edges)
示例#25
0
    def test_add_edges_from_expert(self):
        """edges added with expert origin should be labelled as expert origin"""

        sm = StructureModel()
        edges = [(1, 2), (2, 3)]
        sm.add_edges_from(edges, "expert")

        assert all(edge in sm.edges for edge in edges)
        assert all((u, v, "expert") in sm.edges.data("origin") for u, v in edges)
示例#26
0
 def test_zero_lambda(self):
     """
     A wrong initialisation could lead to counts always being zero if they dont
     have parents.
     """
     graph = StructureModel()
     graph.add_nodes_from(list(range(20)))
     df = generate_count_dataframe(graph, 10000)
     assert not np.any(df.mean() == 0)
示例#27
0
    def test_to_undirected(self):
        """should create an undirected Graph"""

        sm = StructureModel()
        sm.add_edges_from([(1, 2), (2, 1), (2, 3), (3, 4)])

        udg = sm.to_undirected()
        assert all(edge in udg.edges for edge in [(2, 3), (3, 4)])
        assert (1, 2) in udg.edges or (2, 1) in udg.edges
        assert len(udg.edges) == 3
示例#28
0
    def test_to_directed(self):
        """should create a structure model"""

        sm = StructureModel()
        edges = [(1, 2), (2, 1), (2, 3), (3, 4)]
        sm.add_edges_from(edges)

        dag = sm.to_directed()
        assert isinstance(dag, StructureModel)
        assert all(edge in dag.edges for edge in edges)
示例#29
0
    def test_number_of_columns(self, num_nodes, n_categories):
        """Length of dataframe is in the correct shape"""
        graph = StructureModel()
        edges = [(n, n + 1, 1) for n in range(num_nodes - 1)]
        graph.add_weighted_edges_from(edges)

        data = generate_categorical_dataframe(graph,
                                              100,
                                              seed=10,
                                              n_categories=n_categories)
        assert data.shape[1] == (num_nodes * n_categories)
示例#30
0
    def test_fit_with_missing_feature_in_data(self):
        """An error should be raised if fit is called with missing feature in data"""
        cg = StructureModel()

        cg.add_weighted_edges_from([("a", "e", 1)])
        with pytest.raises(
            KeyError,
            match="The data does not cover all the features found in the Bayesian Network. "
            "Please check the following features: {'e'}",
        ):
            BayesianNetwork(cg).fit_node_states(
                pd.DataFrame([[1, 1, 1, 1]], columns=["a", "b", "c", "d"])
            )