示例#1
0
def bayesian_network_datasets(name="asia", samples=10000):
    """
    Generates well known sample/toy datasets for bayesian networks,
    by sampling from existing graph model.

    Parameters
    ----------
    name: str, (default:'asia')
        Name of the model to sample from
    samples: int, (default: 10000)
        Number of observations for our dataset
    Returns
    -------
    pd.DataFrame
    """
    model = bn.import_DAG(name)
    df = bn.sampling(model, n=samples)
    return df
示例#2
0
def test_sampling():
    # TEST 1:
    model = bn.import_DAG('Sprinkler')
    n = np.random.randint(10, 1000)
    df = bn.sampling(model, n=n)
    assert df.shape == (n, 4)
示例#3
0
def test_structure_learning():
    import bnlearn as bn
    df = bn.import_example()
    model = bn.structure_learning.fit(df)
    assert [*model.keys()] == ['model', 'model_edges', 'adjmat', 'config']
    model = bn.structure_learning.fit(df, methodtype='hc', scoretype='bic')
    assert [*model.keys()] == ['model', 'model_edges', 'adjmat', 'config']
    model = bn.structure_learning.fit(df, methodtype='hc', scoretype='k2')
    assert [*model.keys()] == ['model', 'model_edges', 'adjmat', 'config']
    model = bn.structure_learning.fit(df, methodtype='cs', scoretype='bdeu')
    assert [*model.keys()] == [
        'undirected', 'undirected_edges', 'pdag', 'pdag_edges', 'dag',
        'dag_edges', 'model', 'model_edges', 'adjmat', 'config'
    ]
    model = bn.structure_learning.fit(df, methodtype='cs', scoretype='k2')
    assert [*model.keys()] == [
        'undirected', 'undirected_edges', 'pdag', 'pdag_edges', 'dag',
        'dag_edges', 'model', 'model_edges', 'adjmat', 'config'
    ]
    model = bn.structure_learning.fit(df, methodtype='ex', scoretype='bdeu')
    assert [*model.keys()] == ['model', 'model_edges', 'adjmat', 'config']
    model = bn.structure_learning.fit(df, methodtype='ex', scoretype='k2')
    assert [*model.keys()] == ['model', 'model_edges', 'adjmat', 'config']
    model = bn.structure_learning.fit(df, methodtype='cl', root_node='Cloudy')
    assert [*model.keys()] == ['model', 'model_edges', 'adjmat', 'config']

    # Test the filtering
    DAG = bn.import_DAG('asia')
    # Sampling
    df = bn.sampling(DAG, n=1000)
    # Structure learning of sampled dataset
    model = bn.structure_learning.fit(df)
    assert np.all(
        model['adjmat'].columns.values ==
        ['smoke', 'bronc', 'lung', 'asia', 'tub', 'either', 'dysp', 'xray'])

    # hc Enforce and filtering
    model = bn.structure_learning.fit(df,
                                      methodtype='hc',
                                      white_list=['smoke', 'either'],
                                      bw_list_method='filter')
    assert np.all(model['adjmat'].columns.values == ['smoke', 'either'])
    model = bn.structure_learning.fit(df,
                                      methodtype='hc',
                                      white_list=['smoke', 'either'],
                                      bw_list_method='enforce')
    assert np.all(
        model['adjmat'].columns.values ==
        ['smoke', 'bronc', 'lung', 'asia', 'tub', 'either', 'dysp', 'xray'])
    model = bn.structure_learning.fit(df,
                                      methodtype='hc',
                                      black_list=['smoke', 'either'],
                                      bw_list_method='filter')
    assert np.all(model['adjmat'].columns.values ==
                  ['bronc', 'lung', 'asia', 'tub', 'dysp', 'xray'])
    model = bn.structure_learning.fit(df,
                                      methodtype='hc',
                                      scoretype='bic',
                                      black_list=['smoke', 'either'],
                                      bw_list_method='enforce')
    assert np.all(
        model['adjmat'].columns.values ==
        ['smoke', 'bronc', 'lung', 'asia', 'tub', 'either', 'dysp', 'xray'])
    # hc filter
    model = bn.structure_learning.fit(df,
                                      methodtype='ex',
                                      white_list=['smoke', 'either'],
                                      bw_list_method='filter')
    assert np.all(model['adjmat'].columns.values == ['either', 'smoke'])
    model = bn.structure_learning.fit(
        df,
        methodtype='ex',
        black_list=['asia', 'tub', 'either', 'dysp', 'xray'],
        bw_list_method='filter')
    assert np.all(model['adjmat'].columns.values == ['bronc', 'lung', 'smoke'])
    # cs filter
    model = bn.structure_learning.fit(df,
                                      methodtype='cs',
                                      white_list=['smoke', 'either'],
                                      bw_list_method='filter')
    assert np.all(model['adjmat'].columns.values == ['either', 'smoke'])
    model = bn.structure_learning.fit(
        df,
        methodtype='cs',
        black_list=['asia', 'tub', 'either', 'dysp', 'xray'],
        bw_list_method='filter')
    assert np.all(model['adjmat'].columns.values == ['bronc', 'smoke', 'lung'])
    # cl filter
    model = bn.structure_learning.fit(df,
                                      methodtype='cl',
                                      white_list=['smoke', 'either'],
                                      bw_list_method='filter',
                                      root_node='smoke')
    assert np.all(model['adjmat'].columns.values == ['smoke', 'either'])
示例#4
0
文件: examples.py 项目: ms440/bnlearn
model_ex_bic = bn.structure_learning.fit(df, methodtype='ex', scoretype='bic')
model_ex_k2 = bn.structure_learning.fit(df, methodtype='ex', scoretype='k2')
model_ex_bdeu = bn.structure_learning.fit(df,
                                          methodtype='ex',
                                          scoretype='bdeu')

bn.compare_networks(model, model_hc_bic, pos=G['pos'])

# %% Example with dataset
import bnlearn as bn
DAG = bn.import_DAG('sprinkler')
# Print cpds
bn.print_CPD(DAG)
# plot ground truth
G = bn.plot(DAG)
df = bn.sampling(DAG, n=100)

# %% Inference using custom DAG
import bnlearn as bn
# Load asia DAG
df = bn.import_example('asia')
# from tabulate import tabulate
# print(tabulate(df.head(), tablefmt="grid", headers="keys"))
print(df)

edges = [('smoke', 'lung'), ('smoke', 'bronc'), ('lung', 'xray'),
         ('bronc', 'xray')]

# edges = [('smoke', 'xray'),
# ('bronc', 'lung')]
示例#5
0
文件: examples.py 项目: khc3/bnlearn
# %%
print(dir(bn.structure_learning))
print(dir(bn.parameter_learning))
print(dir(bn.inference))

# %%
# Example dataframe sprinkler_data.csv can be loaded with: 
df = bn.import_example()
# df = pd.read_csv('sprinkler_data.csv')
model = bn.structure_learning.fit(df)
G = bn.plot(model)

# %% Load example dataframe from sprinkler
DAG = bn.import_DAG('sprinkler', verbose=0)
df = bn.sampling(DAG, n=1000, verbose=0)

# Structure learning
model = bn.structure_learning.fit(df, verbose=0)
# Plot
G = bn.plot(model)

model_hc_bic  = bn.structure_learning.fit(df, methodtype='hc', scoretype='bic', verbose=0)

# %% Load example dataframe from sprinkler
df = bn.import_example('sprinkler', verbose=0)
# Structure learning
model = bn.structure_learning.fit(df, verbose=0)
# Plot
G = bn.plot(model, verbose=0)
示例#6
0
# Print the CPDs
bnlearn.print_CPD(DAGnew)

# Make inference
q4 = bnlearn.inference.fit(DAGnew, variables=['bronc','lung'], evidence={'smoke':1, 'xray':0})
# q4 = bnlearn.inference.fit(DAGnew, variables=['bronc','lung'], evidence={'smoke':0, 'xray':0})


# %% Example compare networks
# Load asia DAG
import bnlearn
DAG = bnlearn.import_DAG('asia')
# plot ground truth
G = bnlearn.plot(DAG)
# Sampling
df = bnlearn.sampling(DAG, n=10000)
# Structure learning of sampled dataset
model_sl = bnlearn.structure_learning.fit(df, methodtype='hc', scoretype='bic')
# Plot based on structure learning of sampled data
bnlearn.plot(model_sl, pos=G['pos'])
# Compare networks and make plot
bnlearn.compare_networks(model, model_sl, pos=G['pos'])


# Structure learning with black list
model_wl = bnlearn.structure_learning.fit(df, methodtype='hc', white_list=['asia','tub','bronc','xray','smoke'])
bnlearn.plot(model_wl, pos=G['pos'])

model_bl = bnlearn.structure_learning.fit(df, methodtype='hc', black_list=['asia','tub'])
bnlearn.plot(model_bl, pos=G['pos'])
示例#7
0
        y_distribution_list.append(y_distribution.get(key, 0))
    norm_x = [float(i) / sum(x_distribution_list) for i in x_distribution_list]
    norm_y = [float(i) / sum(y_distribution_list) for i in y_distribution_list]
    return entropy(norm_x, qk=norm_y)


# Load asia DAG
dataset = "child"
model = bn.import_DAG(dc[dataset]["path"])

# plot ground truth
# G = bn.plot(model)
sample_size = [int(1000 * 100 ** (t / 9)) for t in range(0, 10)]
print(sample_size)
# Sampling
true_df = bn.sampling(model, n=100000)
columns = [col for col in true_df.columns]
print("Get true distribution")
true_distribution = get_distribution(true_df, columns)
kl_divergence = dict()

for size in sample_size:
    train_df = true_df.sample(n=size)

    # Structure learning of sampled dataset
    model_learned = bn.structure_learning.fit(true_df, methodtype='cs', scoretype='bic', verbose=0)
    # model_learned = bn.structure_learning.fit(train_df, methodtype='cl', scoretype='bic', root_node='either', verbose=0)

    # Parameter learning of sampled dataset
    model_learned = bn.parameter_learning.fit(model_learned, train_df, verbose=0)