def bayesian_network_datasets(name="asia", samples=10000): """ Generates well known sample/toy datasets for bayesian networks, by sampling from existing graph model. Parameters ---------- name: str, (default:'asia') Name of the model to sample from samples: int, (default: 10000) Number of observations for our dataset Returns ------- pd.DataFrame """ model = bn.import_DAG(name) df = bn.sampling(model, n=samples) return df
def test_sampling(): # TEST 1: model = bn.import_DAG('Sprinkler') n = np.random.randint(10, 1000) df = bn.sampling(model, n=n) assert df.shape == (n, 4)
def test_structure_learning(): import bnlearn as bn df = bn.import_example() model = bn.structure_learning.fit(df) assert [*model.keys()] == ['model', 'model_edges', 'adjmat', 'config'] model = bn.structure_learning.fit(df, methodtype='hc', scoretype='bic') assert [*model.keys()] == ['model', 'model_edges', 'adjmat', 'config'] model = bn.structure_learning.fit(df, methodtype='hc', scoretype='k2') assert [*model.keys()] == ['model', 'model_edges', 'adjmat', 'config'] model = bn.structure_learning.fit(df, methodtype='cs', scoretype='bdeu') assert [*model.keys()] == [ 'undirected', 'undirected_edges', 'pdag', 'pdag_edges', 'dag', 'dag_edges', 'model', 'model_edges', 'adjmat', 'config' ] model = bn.structure_learning.fit(df, methodtype='cs', scoretype='k2') assert [*model.keys()] == [ 'undirected', 'undirected_edges', 'pdag', 'pdag_edges', 'dag', 'dag_edges', 'model', 'model_edges', 'adjmat', 'config' ] model = bn.structure_learning.fit(df, methodtype='ex', scoretype='bdeu') assert [*model.keys()] == ['model', 'model_edges', 'adjmat', 'config'] model = bn.structure_learning.fit(df, methodtype='ex', scoretype='k2') assert [*model.keys()] == ['model', 'model_edges', 'adjmat', 'config'] model = bn.structure_learning.fit(df, methodtype='cl', root_node='Cloudy') assert [*model.keys()] == ['model', 'model_edges', 'adjmat', 'config'] # Test the filtering DAG = bn.import_DAG('asia') # Sampling df = bn.sampling(DAG, n=1000) # Structure learning of sampled dataset model = bn.structure_learning.fit(df) assert np.all( model['adjmat'].columns.values == ['smoke', 'bronc', 'lung', 'asia', 'tub', 'either', 'dysp', 'xray']) # hc Enforce and filtering model = bn.structure_learning.fit(df, methodtype='hc', white_list=['smoke', 'either'], bw_list_method='filter') assert np.all(model['adjmat'].columns.values == ['smoke', 'either']) model = bn.structure_learning.fit(df, methodtype='hc', white_list=['smoke', 'either'], bw_list_method='enforce') assert np.all( model['adjmat'].columns.values == ['smoke', 'bronc', 'lung', 'asia', 'tub', 'either', 'dysp', 'xray']) model = bn.structure_learning.fit(df, methodtype='hc', black_list=['smoke', 'either'], bw_list_method='filter') assert np.all(model['adjmat'].columns.values == ['bronc', 'lung', 'asia', 'tub', 'dysp', 'xray']) model = bn.structure_learning.fit(df, methodtype='hc', scoretype='bic', black_list=['smoke', 'either'], bw_list_method='enforce') assert np.all( model['adjmat'].columns.values == ['smoke', 'bronc', 'lung', 'asia', 'tub', 'either', 'dysp', 'xray']) # hc filter model = bn.structure_learning.fit(df, methodtype='ex', white_list=['smoke', 'either'], bw_list_method='filter') assert np.all(model['adjmat'].columns.values == ['either', 'smoke']) model = bn.structure_learning.fit( df, methodtype='ex', black_list=['asia', 'tub', 'either', 'dysp', 'xray'], bw_list_method='filter') assert np.all(model['adjmat'].columns.values == ['bronc', 'lung', 'smoke']) # cs filter model = bn.structure_learning.fit(df, methodtype='cs', white_list=['smoke', 'either'], bw_list_method='filter') assert np.all(model['adjmat'].columns.values == ['either', 'smoke']) model = bn.structure_learning.fit( df, methodtype='cs', black_list=['asia', 'tub', 'either', 'dysp', 'xray'], bw_list_method='filter') assert np.all(model['adjmat'].columns.values == ['bronc', 'smoke', 'lung']) # cl filter model = bn.structure_learning.fit(df, methodtype='cl', white_list=['smoke', 'either'], bw_list_method='filter', root_node='smoke') assert np.all(model['adjmat'].columns.values == ['smoke', 'either'])
model_ex_bic = bn.structure_learning.fit(df, methodtype='ex', scoretype='bic') model_ex_k2 = bn.structure_learning.fit(df, methodtype='ex', scoretype='k2') model_ex_bdeu = bn.structure_learning.fit(df, methodtype='ex', scoretype='bdeu') bn.compare_networks(model, model_hc_bic, pos=G['pos']) # %% Example with dataset import bnlearn as bn DAG = bn.import_DAG('sprinkler') # Print cpds bn.print_CPD(DAG) # plot ground truth G = bn.plot(DAG) df = bn.sampling(DAG, n=100) # %% Inference using custom DAG import bnlearn as bn # Load asia DAG df = bn.import_example('asia') # from tabulate import tabulate # print(tabulate(df.head(), tablefmt="grid", headers="keys")) print(df) edges = [('smoke', 'lung'), ('smoke', 'bronc'), ('lung', 'xray'), ('bronc', 'xray')] # edges = [('smoke', 'xray'), # ('bronc', 'lung')]
# %% print(dir(bn.structure_learning)) print(dir(bn.parameter_learning)) print(dir(bn.inference)) # %% # Example dataframe sprinkler_data.csv can be loaded with: df = bn.import_example() # df = pd.read_csv('sprinkler_data.csv') model = bn.structure_learning.fit(df) G = bn.plot(model) # %% Load example dataframe from sprinkler DAG = bn.import_DAG('sprinkler', verbose=0) df = bn.sampling(DAG, n=1000, verbose=0) # Structure learning model = bn.structure_learning.fit(df, verbose=0) # Plot G = bn.plot(model) model_hc_bic = bn.structure_learning.fit(df, methodtype='hc', scoretype='bic', verbose=0) # %% Load example dataframe from sprinkler df = bn.import_example('sprinkler', verbose=0) # Structure learning model = bn.structure_learning.fit(df, verbose=0) # Plot G = bn.plot(model, verbose=0)
# Print the CPDs bnlearn.print_CPD(DAGnew) # Make inference q4 = bnlearn.inference.fit(DAGnew, variables=['bronc','lung'], evidence={'smoke':1, 'xray':0}) # q4 = bnlearn.inference.fit(DAGnew, variables=['bronc','lung'], evidence={'smoke':0, 'xray':0}) # %% Example compare networks # Load asia DAG import bnlearn DAG = bnlearn.import_DAG('asia') # plot ground truth G = bnlearn.plot(DAG) # Sampling df = bnlearn.sampling(DAG, n=10000) # Structure learning of sampled dataset model_sl = bnlearn.structure_learning.fit(df, methodtype='hc', scoretype='bic') # Plot based on structure learning of sampled data bnlearn.plot(model_sl, pos=G['pos']) # Compare networks and make plot bnlearn.compare_networks(model, model_sl, pos=G['pos']) # Structure learning with black list model_wl = bnlearn.structure_learning.fit(df, methodtype='hc', white_list=['asia','tub','bronc','xray','smoke']) bnlearn.plot(model_wl, pos=G['pos']) model_bl = bnlearn.structure_learning.fit(df, methodtype='hc', black_list=['asia','tub']) bnlearn.plot(model_bl, pos=G['pos'])
y_distribution_list.append(y_distribution.get(key, 0)) norm_x = [float(i) / sum(x_distribution_list) for i in x_distribution_list] norm_y = [float(i) / sum(y_distribution_list) for i in y_distribution_list] return entropy(norm_x, qk=norm_y) # Load asia DAG dataset = "child" model = bn.import_DAG(dc[dataset]["path"]) # plot ground truth # G = bn.plot(model) sample_size = [int(1000 * 100 ** (t / 9)) for t in range(0, 10)] print(sample_size) # Sampling true_df = bn.sampling(model, n=100000) columns = [col for col in true_df.columns] print("Get true distribution") true_distribution = get_distribution(true_df, columns) kl_divergence = dict() for size in sample_size: train_df = true_df.sample(n=size) # Structure learning of sampled dataset model_learned = bn.structure_learning.fit(true_df, methodtype='cs', scoretype='bic', verbose=0) # model_learned = bn.structure_learning.fit(train_df, methodtype='cl', scoretype='bic', root_node='either', verbose=0) # Parameter learning of sampled dataset model_learned = bn.parameter_learning.fit(model_learned, train_df, verbose=0)