def test_pdag_to_dag(self):
        pdag1 = DirectedGraph([('A', 'B'), ('C', 'B'), ('C', 'D'), ('D', 'C'), ('D', 'A'), ('A', 'D')])
        dag1 = ConstraintBasedEstimator.pdag_to_dag(pdag1)
        self.assertTrue(('A', 'B') in dag1.edges() and
                        ('C', 'B') in dag1.edges() and
                        len(dag1.edges()) == 4)

        pdag2 = DirectedGraph([('B', 'C'), ('D', 'A'), ('A', 'D'), ('A', 'C')])
        dag2 = ConstraintBasedEstimator.pdag_to_dag(pdag2)
        self.assertTrue(set(dag2.edges()) == set([('B', 'C'), ('A', 'D'), ('A', 'C')]) or
                        set(dag2.edges()) == set([('B', 'C'), ('D', 'A'), ('A', 'C')]))

        pdag3 = DirectedGraph([('B', 'C'), ('D', 'C'), ('C', 'D'), ('A', 'C')])
        dag3 = ConstraintBasedEstimator.pdag_to_dag(pdag3)
        self.assertSetEqual(set([('B', 'C'), ('C', 'D'), ('A', 'C')]),
                            set(dag3.edges()))
Пример #2
0
def _constraintsearch(df, significance_level=0.05, verbose=3):
    """Contrain search.

    test_conditional_independence() returns a tripel (chi2, p_value, sufficient_data),
    consisting in the computed chi2 test statistic, the p_value of the test, and a heuristig
    flag that indicates if the sample size was sufficient.
    The p_value is the probability of observing the computed chi2 statistic (or an even higher chi2 value),
    given the null hypothesis that X and Y are independent given Zs.
    This can be used to make independence judgements, at a given level of significance.
    """
    out = dict()
    # Set search algorithm
    model = ConstraintBasedEstimator(df)

    # Some checks for dependency
    #    print(_is_independent(est, 'Sprinkler', 'Rain', significance_level=significance_level))
    #    print(_is_independent(est, 'Cloudy', 'Rain', significance_level=significance_level))
    #    print(_is_independent(est, 'Sprinkler', 'Rain',  ['Wet_Grass'], significance_level=significance_level))
    """
    DAG (pattern) construction
    With a method for independence testing at hand, we can construct a DAG from the data set in three steps:
        1. Construct an undirected skeleton - `estimate_skeleton()`
        2. Orient compelled edges to obtain partially directed acyclid graph (PDAG; I-equivalence class of DAGs) - `skeleton_to_pdag()`
        3. Extend DAG pattern to a DAG by conservatively orienting the remaining edges in some way - `pdag_to_dag()`

        Step 1.&2. form the so-called PC algorithm, see [2], page 550. PDAGs are `DirectedGraph`s, that may contain both-way edges, to indicate that the orientation for the edge is not determined.
    """
    # Estimate using chi2
    [skel, seperating_sets
     ] = model.estimate_skeleton(significance_level=significance_level)

    print("Undirected edges: ", skel.edges())
    pdag = model.skeleton_to_pdag(skel, seperating_sets)
    print("PDAG edges: ", pdag.edges())
    dag = model.pdag_to_dag(pdag)
    print("DAG edges: ", dag.edges())

    out['undirected'] = skel
    out['undirected_edges'] = skel.edges()
    out['pdag'] = pdag
    out['pdag_edges'] = pdag.edges()
    out['dag'] = dag
    out['dag_edges'] = dag.edges()

    # Search using "estimate()" method provides a shorthand for the three steps above and directly returns a "BayesianModel"
    best_model = model.estimate(significance_level=significance_level)
    out['model'] = best_model
    out['model_edges'] = best_model.edges()

    print(best_model.edges())
    """
    PC PDAG construction is only guaranteed to work under the assumption that the
    identified set of independencies is *faithful*, i.e. there exists a DAG that
    exactly corresponds to it. Spurious dependencies in the data set can cause
    the reported independencies to violate faithfulness. It can happen that the
    estimated PDAG does not have any faithful completions (i.e. edge orientations
    that do not introduce new v-structures). In that case a warning is issued.
    """
    return (out)
Пример #3
0
    def test_pdag_to_dag(self):
        pdag1 = DirectedGraph([('A', 'B'), ('C', 'B'), ('C', 'D'), ('D', 'C'),
                               ('D', 'A'), ('A', 'D')])
        dag1 = ConstraintBasedEstimator.pdag_to_dag(pdag1)
        self.assertTrue(('A', 'B') in dag1.edges()
                        and ('C', 'B') in dag1.edges()
                        and len(dag1.edges()) == 4)

        pdag2 = DirectedGraph([('B', 'C'), ('D', 'A'), ('A', 'D'), ('A', 'C')])
        dag2 = ConstraintBasedEstimator.pdag_to_dag(pdag2)
        self.assertTrue(
            set(dag2.edges()) == set([('B', 'C'), ('A', 'D'), ('A', 'C')])
            or set(dag2.edges()) == set([('B', 'C'), ('D', 'A'), ('A', 'C')]))

        pdag3 = DirectedGraph([('B', 'C'), ('D', 'C'), ('C', 'D'), ('A', 'C')])
        dag3 = ConstraintBasedEstimator.pdag_to_dag(pdag3)
        self.assertSetEqual(set([('B', 'C'), ('C', 'D'), ('A', 'C')]),
                            set(dag3.edges()))
    def test_pdag_to_dag(self):
        pdag1 = nx.DiGraph([("A", "B"), ("C", "B"), ("C", "D"), ("D", "C"),
                            ("D", "A"), ("A", "D")])
        dag1 = ConstraintBasedEstimator.pdag_to_dag(pdag1)
        self.assertTrue(("A", "B") in dag1.edges()
                        and ("C", "B") in dag1.edges()
                        and len(dag1.edges()) == 4)

        pdag2 = nx.DiGraph([("B", "C"), ("D", "A"), ("A", "D"), ("A", "C")])
        dag2 = ConstraintBasedEstimator.pdag_to_dag(pdag2)
        self.assertTrue(
            set(dag2.edges()) == set([("B", "C"), ("A", "D"), ("A", "C")])
            or set(dag2.edges()) == set([("B", "C"), ("D", "A"), ("A", "C")]))

        pdag3 = nx.DiGraph([("B", "C"), ("D", "C"), ("C", "D"), ("A", "C")])
        dag3 = ConstraintBasedEstimator.pdag_to_dag(pdag3)
        self.assertSetEqual(set([("B", "C"), ("C", "D"), ("A", "C")]),
                            set(dag3.edges()))
Пример #5
0
    def pdag2dag(self, edge_dict):
        pdag_edges = [(pi, n) for n, p in edge_dict.items() for pi in p]
        pdag = DAG(pdag_edges)
        dag_edges = ConstraintBasedEstimator.pdag_to_dag(pdag).edges()
        dag = dict([(n, set()) for n in range(len(edge_dict))])
        for e in dag_edges:
            dag[e[1]].add(e[0])

        return dag
def Constraint_based(dataset: pd.DataFrame):
    from pgmpy.estimators import ConstraintBasedEstimator

    est = ConstraintBasedEstimator(dataset)

    # Construct dag
    skel, seperating_sets = est.estimate_skeleton(significance_level=0.01)
    print("Undirected edges:", skel.edges())

    pdag = est.skeleton_to_pdag(skel, seperating_sets)
    print("PDAG edges:", pdag.edges())

    model = est.pdag_to_dag(pdag)
    print("DAG edges:", model.edges())

    return model.edges()
Пример #7
0
    print(cpd)
"""

print()

### Another Method (it will throw errors about sample size - but it still runs and shouldn't be too messed up)
###Constraint Based Structure Learning
est = ConstraintBasedEstimator(train)

skel, seperating_sets = est.estimate_skeleton(significance_level=0.01)
print("Undirected edges: ", skel.edges())

pdag = est.skeleton_to_pdag(skel, seperating_sets)
print("PDAG edges:       ", pdag.edges())

cb_model = est.pdag_to_dag(pdag)
print("DAG edges:        ", cb_model.edges())

### Parameter learning with MLE
cb_model.fit(train, estimator=MaximumLikelihoodEstimator)

#Notice the significant difference in the connections that this version produces
#Print the final significant edges learned from constraint-based learning
print("The edges learned from constraint-based learning are:")
print(est.estimate(significance_level=0.01).edges())

#Print the hill climber's edges
print("The edges learned from score-based learning (hill climbing) are:")
print(hc_model.edges())

Y_pred_hc = hc_model.predict(test)