Пример #1
0
def _constraintsearch(df, significance_level=0.05, verbose=3):
    """Contrain search.

    test_conditional_independence() returns a tripel (chi2, p_value, sufficient_data),
    consisting in the computed chi2 test statistic, the p_value of the test, and a heuristig
    flag that indicates if the sample size was sufficient.
    The p_value is the probability of observing the computed chi2 statistic (or an even higher chi2 value),
    given the null hypothesis that X and Y are independent given Zs.
    This can be used to make independence judgements, at a given level of significance.
    """
    out = dict()
    # Set search algorithm
    model = ConstraintBasedEstimator(df)

    # Some checks for dependency
    #    print(_is_independent(est, 'Sprinkler', 'Rain', significance_level=significance_level))
    #    print(_is_independent(est, 'Cloudy', 'Rain', significance_level=significance_level))
    #    print(_is_independent(est, 'Sprinkler', 'Rain',  ['Wet_Grass'], significance_level=significance_level))
    """
    DAG (pattern) construction
    With a method for independence testing at hand, we can construct a DAG from the data set in three steps:
        1. Construct an undirected skeleton - `estimate_skeleton()`
        2. Orient compelled edges to obtain partially directed acyclid graph (PDAG; I-equivalence class of DAGs) - `skeleton_to_pdag()`
        3. Extend DAG pattern to a DAG by conservatively orienting the remaining edges in some way - `pdag_to_dag()`

        Step 1.&2. form the so-called PC algorithm, see [2], page 550. PDAGs are `DirectedGraph`s, that may contain both-way edges, to indicate that the orientation for the edge is not determined.
    """
    # Estimate using chi2
    [skel, seperating_sets
     ] = model.estimate_skeleton(significance_level=significance_level)

    print("Undirected edges: ", skel.edges())
    pdag = model.skeleton_to_pdag(skel, seperating_sets)
    print("PDAG edges: ", pdag.edges())
    dag = model.pdag_to_dag(pdag)
    print("DAG edges: ", dag.edges())

    out['undirected'] = skel
    out['undirected_edges'] = skel.edges()
    out['pdag'] = pdag
    out['pdag_edges'] = pdag.edges()
    out['dag'] = dag
    out['dag_edges'] = dag.edges()

    # Search using "estimate()" method provides a shorthand for the three steps above and directly returns a "BayesianModel"
    best_model = model.estimate(significance_level=significance_level)
    out['model'] = best_model
    out['model_edges'] = best_model.edges()

    print(best_model.edges())
    """
    PC PDAG construction is only guaranteed to work under the assumption that the
    identified set of independencies is *faithful*, i.e. there exists a DAG that
    exactly corresponds to it. Spurious dependencies in the data set can cause
    the reported independencies to violate faithfulness. It can happen that the
    estimated PDAG does not have any faithful completions (i.e. edge orientations
    that do not introduce new v-structures). In that case a warning is issued.
    """
    return (out)
Пример #2
0
def pc(mat):
    data = pd.DataFrame(mat)
    c = ConstraintBasedEstimator(data)
    model = c.estimate()

    # kinda hacky, but can't find a more direct way
    # of getting the adj matrix
    g = nx.DiGraph()
    g.add_nodes_from(model.nodes())
    g.add_edges_from(model.edges())

    # specify nodelist to maintain ordering
    # consistent with dataframe
    # TODO this is a non-weighted adjacency matrix,
    # but according to the paper, it might need to be? given
    # their signs are being checked
    return nx.adjacency_matrix(g, nodelist=data.columns).todense()
Пример #3
0
def constraintStructureLearn(data, significance_level=0.01):
    #根据条件独立性约束构建贝叶斯网络
    est = ConstraintBasedEstimator(data)
    best_model = est.estimate(significance_level)
    return best_model
Пример #4
0
skel, seperating_sets = est.estimate_skeleton(significance_level=0.01)
print("Undirected edges: ", skel.edges())

pdag = est.skeleton_to_pdag(skel, seperating_sets)
print("PDAG edges:       ", pdag.edges())

cb_model = est.pdag_to_dag(pdag)
print("DAG edges:        ", cb_model.edges())

### Parameter learning with MLE
cb_model.fit(train, estimator=MaximumLikelihoodEstimator)

#Notice the significant difference in the connections that this version produces
#Print the final significant edges learned from constraint-based learning
print("The edges learned from constraint-based learning are:")
print(est.estimate(significance_level=0.01).edges())

#Print the hill climber's edges
print("The edges learned from score-based learning (hill climbing) are:")
print(hc_model.edges())

Y_pred_hc = hc_model.predict(test)
Y_pred_cb = cb_model.predict(test)

Y_pred_hc = labelencoder.fit_transform(Y_pred_hc.values.ravel())
Y_pred_cb = labelencoder.fit_transform(Y_pred_cb.values.ravel())

# Output results {'Accuracy': 0.9708029197080292, 'Precision': 0.9423076923076923, 'F1 Score': 0.9607843137254902}
accuracy_hc = accuracy_score(Y_test, Y_pred_hc)
precision_hc = precision_score(Y_test, Y_pred_hc)
f1_hc = f1_score(Y_test, Y_pred_hc)
Пример #5
0
 data = data.iloc[:, random_columns]
 #Delete invoices with all zeros from the data
 data = data[(data.T != 0).any()]
 row_size = data.shape[0]
 random_indices = sample(range(row_size), 2000)
 smallDF = data.iloc[random_indices, :]
 smallDF.shape
 PseudoCounts = {}
 #Pseudocounts are given (1,1) for uniform
 for productName in smallDF.columns:
     PseudoCounts[productName] = [1, 1]
 print('Existing network not found')
 est = ConstraintBasedEstimator(smallDF)
 print('Starting to estimate the model structure, might take a while...')
 start = time.time()
 model = est.estimate(significance_level=0.05)
 end = time.time()
 print('Time spent to estimate model structure {0}'.format(end - start))
 print('Edges of the model:')
 print(model.edges())
 print('Starting to estimate model parameters..')
 start = time.time()
 model.fit(smallDF,
           estimator=BayesianEstimator,
           prior_type='dirichlet',
           pseudo_counts=PseudoCounts)
 end = time.time()
 print('Time spent to estimete the model parameters {0}'.format(end -
                                                                start))
 #Save edge ,node, CPD information
 Edges = model.edges()