Exemplo n.º 1
0
def load_total_cpds():
    # All the nodes in the graph (157 nodes)
    gnodes = total_G.nodes

    data = pd.DataFrame(np.random.randint(low=0,
                                          high=2,
                                          size=(100, len(gnodes))),
                        columns=gnodes)
    # Option 1 of fitting cpds
    estimator = BayesianEstimator(total_G, data)
    p = estimator.get_parameters(prior_type='BDeu', equivalent_sample_size=5)
    for i, cpd in enumerate(p):
        total_G.add_cpds(cpd)

    # Option 2 of fitting cpds
    for i in range(1, num_sub_symptoms + 1):
        cpd_sub = estimator.estimate_cpd('sub_sympt_' + str(i),
                                         prior_type="BDeu")
        total_G.add_cpds(cpd_sub)
        if i <= num_symptoms:
            cpd_symp = estimator.estimate_cpd('sympt_' + str(i),
                                              prior_type="BDeu")
            total_G.add_cpds(cpd_symp)

    # this is the time cruncher.
    for i in range(1, num_conditions + 1):
        cpd_cond = estimator.estimate_cpd('cond_' + str(i), prior_type="BDeu")
        total_G.add_cpds(cpd_cond)
Exemplo n.º 2
0
class TimeBayesianEstimator:
    timeout = 1200

    def setup(self):
        self.alarm = get_example_model('alarm')
        self.alarm_model = BayesianNetwork(self.alarm.edges())
        self.alarm_est = BayesianEstimator(
            self.alarm_model, self.alarm.simulate(int(1e4),
                                                  show_progress=False))

        self.munin = get_example_model('munin1')
        self.munin_model = BayesianNetwork(self.munin.edges())
        self.munin_est = BayesianEstimator(
            self.munin_model, self.munin.simulate(int(1e4),
                                                  show_progress=False))

    def time_alarm_bayesian_estimator(self):
        self.alarm_est.get_parameters()

    def time_munin_bayesian_estimator(self):
        self.munin_est.get_parameters()
mydata = np.genfromtxt(
    r'E:\Lessons_tutorials\Behavioural user profile articles\Datasets\7 twor.2009\twor.2009\converted\pgmpy\activities+time_ordered_withoutdatetime.csv',
    delimiter=",")
#pd.read_csv(r'E:\Lessons_tutorials\Behavioural user profile articles\Datasets\7 twor.2009\twor.2009\converted\pgmpy\data.csv')
#print(mydata)
data = pd.DataFrame(mydata, columns=feature_names)  #['X', 'Y'])
print(data)

list_of_scoring_methods = [
    BicScore(data),
    #BdeuScore(data),
    #K2Score(data)
]

for scoreMethod in list_of_scoring_methods:
    start_time = time.time()
    hc = HillClimbSearch(data, scoreMethod)
    best_model = hc.estimate()
    print(hc.scoring_method)
    print(best_model.edges())
    end_time = time.time()
    print("execution time in seconds:")
    print(end_time - start_time)

estimator = BayesianEstimator(best_model, data)
print(estimator.get_parameters(prior_type='K2'))  #, equivalent_sample_size=5)

#casas7_model = BayesianModel()
#casas7_model.fit(data, estimator=BayesianEstimator)#MaximumLikelihoodEstimator)
#print(casas7_model.get_cpds())
#casas7_model.get_n
Exemplo n.º 4
0
best_model = est.estimate()
print("Structure found!")
print("Best Model")
print(best_model.edges())
'''
Parameters Estimation and CPD tables
'''
from pgmpy.models import BayesianModel
from pgmpy.estimators import BayesianEstimator

model = BayesianModel(best_model.edges())
data = pd.read_csv("final_training_set.csv")

estimator = BayesianEstimator(model, data)
parameters = estimator.get_parameters(prior_type='BDeu',
                                      equivalent_sample_size=10)

for cpd in parameters:
    model.add_cpds(cpd)
'''
Inference and Validation
'''
from pgmpy.inference import VariableElimination
import csv

f = open("validation_data.csv")
reader = csv.reader(f)

inference = VariableElimination(model)
valid = 0
invalid = 0
Exemplo n.º 5
0

path = r'C:\Users\Javier\Documents\MEGA\Universitattt\Master\Thesis\CDS_data\toy_data'

file1 = r'epsilon_csv.csv'

data = pd.read_csv(os.path.join(path,file1), index_col = 0, skiprows = [19 + i for i in range(2)]).transpose()

print('Using BicScore Method')

est = HillClimbSearch(data, scoring_method = BicScore(data))
best_model = est.estimate()

# MLE_estimator = MaximumLikelihoodEstimator(best_model, data)
# MLE_parameters = MLE_estimator.get_parameters()

bay_estimator = BayesianEstimator(best_model, data)
bay_parameters = bay_estimator.get_parameters()

print('Edges:')
print(best_model.edges())

# print('MLE Parameters')
# for m in MLE_parameters:
# 	print(m)

print('Bayesian Parameters')
for b in bay_parameters:
		print(b)

class TestBayesianEstimator(unittest.TestCase):
    def setUp(self):
        self.m1 = BayesianModel([('A', 'C'), ('B', 'C')])
        self.d1 = pd.DataFrame(data={'A': [0, 0, 1], 'B': [0, 1, 0], 'C': [1, 1, 0]})
        self.d2 = pd.DataFrame(data={'A': [0, 0, 1, 0, 2, 0, 2, 1, 0, 2],
                                     'B': ['X', 'Y', 'X', 'Y', 'X', 'Y', 'X', 'Y', 'X', 'Y'],
                                     'C': [1, 1, 1, 0, 0, 0, 0, 0, 0, 0]})
        self.est1 = BayesianEstimator(self.m1, self.d1)
        self.est2 = BayesianEstimator(self.m1, self.d1, state_names={'A': [0, 1, 2],
                                                                     'B': [0, 1],
                                                                     'C': [0, 1, 23]})
        self.est3 = BayesianEstimator(self.m1, self.d2)

    def test_estimate_cpd_dirichlet(self):
        cpd_A = self.est1.estimate_cpd('A',  prior_type="dirichlet", pseudo_counts=[0, 1])
        self.assertEqual(cpd_A, TabularCPD('A', 2, [[0.5], [0.5]]))

        cpd_B = self.est1.estimate_cpd('B',  prior_type="dirichlet", pseudo_counts=[9, 3])
        self.assertEqual(cpd_B, TabularCPD('B', 2, [[11.0/15], [4.0/15]]))

        cpd_C = self.est1.estimate_cpd('C',  prior_type="dirichlet", pseudo_counts=[0.4, 0.6])
        self.assertEqual(cpd_C, TabularCPD('C', 2, [[0.2, 0.2, 0.7, 0.4],
                                                    [0.8, 0.8, 0.3, 0.6]],
                                           evidence=['A', 'B'], evidence_card=[2, 2]))

    def test_estimate_cpd_improper_prior(self):
        cpd_C = self.est1.estimate_cpd('C',  prior_type="dirichlet", pseudo_counts=[0, 0])
        cpd_C_correct = (TabularCPD('C', 2, [[0.0, 0.0, 1.0, np.NaN],
                                             [1.0, 1.0, 0.0, np.NaN]],
                                    evidence=['A', 'B'], evidence_card=[2, 2],
                                    state_names={'A': [0, 1], 'B': [0, 1], 'C': [0, 1]}))
        # manual comparison because np.NaN != np.NaN
        self.assertTrue(((cpd_C.values == cpd_C_correct.values) |
                         np.isnan(cpd_C.values) & np.isnan(cpd_C_correct.values)).all())

    def test_estimate_cpd_shortcuts(self):
        cpd_C1 = self.est2.estimate_cpd('C',  prior_type='BDeu', equivalent_sample_size=9)
        cpd_C1_correct = TabularCPD('C', 3, [[0.2, 0.2, 0.6, 1./3, 1./3, 1./3],
                                             [0.6, 0.6, 0.2, 1./3, 1./3, 1./3],
                                             [0.2, 0.2, 0.2, 1./3, 1./3, 1./3]],
                                    evidence=['A', 'B'], evidence_card=[3, 2])
        self.assertEqual(cpd_C1, cpd_C1_correct)

        cpd_C2 = self.est3.estimate_cpd('C',  prior_type='K2')
        cpd_C2_correct = TabularCPD('C', 2, [[0.5, 0.6, 1./3, 2./3, 0.75, 2./3],
                                             [0.5, 0.4, 2./3, 1./3, 0.25, 1./3]],
                                    evidence=['A', 'B'], evidence_card=[3, 2])
        self.assertEqual(cpd_C2, cpd_C2_correct)

    def test_get_parameters(self):
        cpds = set([self.est3.estimate_cpd('A'),
                    self.est3.estimate_cpd('B'),
                    self.est3.estimate_cpd('C')])
        self.assertSetEqual(set(self.est3.get_parameters()), cpds)

    def test_get_parameters2(self):
        pseudo_counts = {'A': [1, 2, 3], 'B': [4, 5], 'C': [6, 7]}
        cpds = set([self.est3.estimate_cpd('A', prior_type="dirichlet", pseudo_counts=pseudo_counts['A']),
                    self.est3.estimate_cpd('B', prior_type="dirichlet", pseudo_counts=pseudo_counts['B']),
                    self.est3.estimate_cpd('C', prior_type="dirichlet", pseudo_counts=pseudo_counts['C'])])
        self.assertSetEqual(set(self.est3.get_parameters(prior_type="dirichlet",
                                                         pseudo_counts=pseudo_counts)), cpds)

    def tearDown(self):
        del self.m1
        del self.d1
        del self.d2
        del self.est1
        del self.est2
]

prior_type = ["dirichlet", "BDeu", "K2"]

for i in range(1, 3):
    f = open(
        r'C:\Users\Adele\Desktop\PhD thesis tests\parameterLearning_' +
        prior_type[i], 'w')
    print(prior_type[i])
    model = BayesianModel(structures[i])
    estimator = BayesianEstimator(model, data)

    end_time = time.time()
    print("parameter learning in seconds:{}".format(end_time - start_time))

    all_cpds = estimator.get_parameters(
        prior_type=prior_type[i])  #BDeu")#"dirichlet")
    for c in all_cpds:
        print(c)
        f.write(c.tostring())  # python will convert \n to os.linesep

    f.close()

#for my_node in feature_names:
#   the_cpd = estimator.estimate_cpd(my_node, prior_type="dirichlet")
#  print(the_cpd)

#casas7_model = BayesianModel()
#casas7_model.fit(data, estimator=BayesianEstimator)#MaximumLikelihoodEstimator)
#print(casas7_model.get_cpds())
#casas7_model.get_n
print(mle.estimate_cpd(node='CVP'))

# Estimating CPDs for all the nodes in the model
print(mle.get_parameters()[:10]) # Show just the first 10 CPDs in the output

# Verifying that the learned parameters are almost equal.
import numpy as np
print(np.allclose(alarm_model.get_cpds('FIO2').values, mle.estimate_cpd('FIO2').values, atol=0.01))

# Fitting the using Bayesian Estimator
from pgmpy.estimators import BayesianEstimator

best = BayesianEstimator(model=model_struct, data=samples)

print(best.estimate_cpd(node='FIO2', prior_type="BDeu", equivalent_sample_size=1000))
# Uniform pseudo count for each state. Can also accept an array of the size of CPD.
print(best.estimate_cpd(node='CVP', prior_type="dirichlet", pseudo_counts=100))

# Learning CPDs for all the nodes in the model. For learning all parameters with BDeU prior, a dict of
# pseudo_counts need to be provided
print(best.get_parameters(prior_type="BDeu", equivalent_sample_size=1000)[:10])

# Shortcut for learning all the parameters and adding the CPDs to the model.

model_struct = BayesianModel(ebunch=alarm_model.edges())
model_struct.fit(data=samples, estimator=MaximumLikelihoodEstimator)
print(model_struct.get_cpds('FIO2'))

model_struct = BayesianModel(ebunch=alarm_model.edges())
model_struct.fit(data=samples, estimator=BayesianEstimator, prior_type='BDeu', equivalent_sample_size=1000)
print(model_struct.get_cpds('FIO2'))
class TestBayesianEstimator(unittest.TestCase):
    def setUp(self):
        self.m1 = BayesianModel([('A', 'C'), ('B', 'C')])
        self.d1 = pd.DataFrame(data={
            'A': [0, 0, 1],
            'B': [0, 1, 0],
            'C': [1, 1, 0]
        })
        self.d2 = pd.DataFrame(
            data={
                'A': [0, 0, 1, 0, 2, 0, 2, 1, 0, 2],
                'B': ['X', 'Y', 'X', 'Y', 'X', 'Y', 'X', 'Y', 'X', 'Y'],
                'C': [1, 1, 1, 0, 0, 0, 0, 0, 0, 0]
            })
        self.est1 = BayesianEstimator(self.m1, self.d1)
        self.est2 = BayesianEstimator(self.m1,
                                      self.d1,
                                      state_names={
                                          'A': [0, 1, 2],
                                          'B': [0, 1],
                                          'C': [0, 1, 23]
                                      })
        self.est3 = BayesianEstimator(self.m1, self.d2)

    def test_estimate_cpd_dirichlet(self):
        cpd_A = self.est1.estimate_cpd('A',
                                       prior_type="dirichlet",
                                       pseudo_counts=[[0], [1]])
        self.assertEqual(cpd_A, TabularCPD('A', 2, [[0.5], [0.5]]))

        cpd_B = self.est1.estimate_cpd('B',
                                       prior_type="dirichlet",
                                       pseudo_counts=[[9], [3]])
        self.assertEqual(cpd_B, TabularCPD('B', 2, [[11.0 / 15], [4.0 / 15]]))

        cpd_C = self.est1.estimate_cpd('C',
                                       prior_type="dirichlet",
                                       pseudo_counts=[[0.4, 0.4, 0.4, 0.4],
                                                      [0.6, 0.6, 0.6, 0.6]])
        self.assertEqual(
            cpd_C,
            TabularCPD('C',
                       2, [[0.2, 0.2, 0.7, 0.4], [0.8, 0.8, 0.3, 0.6]],
                       evidence=['A', 'B'],
                       evidence_card=[2, 2]))

    def test_estimate_cpd_improper_prior(self):
        cpd_C = self.est1.estimate_cpd('C',
                                       prior_type="dirichlet",
                                       pseudo_counts=[[0, 0, 0, 0],
                                                      [0, 0, 0, 0]])
        cpd_C_correct = (TabularCPD(
            'C',
            2, [[0.0, 0.0, 1.0, np.NaN], [1.0, 1.0, 0.0, np.NaN]],
            evidence=['A', 'B'],
            evidence_card=[2, 2],
            state_names={
                'A': [0, 1],
                'B': [0, 1],
                'C': [0, 1]
            }))
        # manual comparison because np.NaN != np.NaN
        self.assertTrue(
            ((cpd_C.values == cpd_C_correct.values)
             | np.isnan(cpd_C.values) & np.isnan(cpd_C_correct.values)).all())

    def test_estimate_cpd_shortcuts(self):
        cpd_C1 = self.est2.estimate_cpd('C',
                                        prior_type='BDeu',
                                        equivalent_sample_size=9)
        cpd_C1_correct = TabularCPD('C',
                                    3,
                                    [[0.2, 0.2, 0.6, 1. / 3, 1. / 3, 1. / 3],
                                     [0.6, 0.6, 0.2, 1. / 3, 1. / 3, 1. / 3],
                                     [0.2, 0.2, 0.2, 1. / 3, 1. / 3, 1. / 3]],
                                    evidence=['A', 'B'],
                                    evidence_card=[3, 2])
        self.assertEqual(cpd_C1, cpd_C1_correct)

        cpd_C2 = self.est3.estimate_cpd('C', prior_type='K2')
        cpd_C2_correct = TabularCPD('C',
                                    2,
                                    [[0.5, 0.6, 1. / 3, 2. / 3, 0.75, 2. / 3],
                                     [0.5, 0.4, 2. / 3, 1. / 3, 0.25, 1. / 3]],
                                    evidence=['A', 'B'],
                                    evidence_card=[3, 2])
        self.assertEqual(cpd_C2, cpd_C2_correct)

    def test_get_parameters(self):
        cpds = set([
            self.est3.estimate_cpd('A'),
            self.est3.estimate_cpd('B'),
            self.est3.estimate_cpd('C')
        ])
        self.assertSetEqual(set(self.est3.get_parameters()), cpds)

    def test_get_parameters2(self):
        pseudo_counts = {
            'A': [[1], [2], [3]],
            'B': [[4], [5]],
            'C': [[6, 6, 6, 6, 6, 6], [7, 7, 7, 7, 7, 7]]
        }
        cpds = set([
            self.est3.estimate_cpd('A',
                                   prior_type="dirichlet",
                                   pseudo_counts=pseudo_counts['A']),
            self.est3.estimate_cpd('B',
                                   prior_type="dirichlet",
                                   pseudo_counts=pseudo_counts['B']),
            self.est3.estimate_cpd('C',
                                   prior_type="dirichlet",
                                   pseudo_counts=pseudo_counts['C'])
        ])
        self.assertSetEqual(
            set(
                self.est3.get_parameters(prior_type="dirichlet",
                                         pseudo_counts=pseudo_counts)), cpds)

    def tearDown(self):
        del self.m1
        del self.d1
        del self.d2
        del self.est1
        del self.est2
Exemplo n.º 10
0
class TestBayesianEstimator(unittest.TestCase):
    def setUp(self):
        self.m1 = BayesianModel([("A", "C"), ("B", "C")])
        self.d1 = pd.DataFrame(data={"A": [0, 0, 1], "B": [0, 1, 0], "C": [1, 1, 0]})
        self.d2 = pd.DataFrame(
            data={
                "A": [0, 0, 1, 0, 2, 0, 2, 1, 0, 2],
                "B": ["X", "Y", "X", "Y", "X", "Y", "X", "Y", "X", "Y"],
                "C": [1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
            }
        )
        self.est1 = BayesianEstimator(self.m1, self.d1)
        self.est2 = BayesianEstimator(
            self.m1, self.d1, state_names={"A": [0, 1, 2], "B": [0, 1], "C": [0, 1, 23]}
        )
        self.est3 = BayesianEstimator(self.m1, self.d2)

    def test_estimate_cpd_dirichlet(self):
        cpd_A = self.est1.estimate_cpd(
            "A", prior_type="dirichlet", pseudo_counts=[[0], [1]]
        )
        self.assertEqual(cpd_A, TabularCPD("A", 2, [[0.5], [0.5]]))

        cpd_B = self.est1.estimate_cpd(
            "B", prior_type="dirichlet", pseudo_counts=[[9], [3]]
        )
        self.assertEqual(cpd_B, TabularCPD("B", 2, [[11.0 / 15], [4.0 / 15]]))

        cpd_C = self.est1.estimate_cpd(
            "C",
            prior_type="dirichlet",
            pseudo_counts=[[0.4, 0.4, 0.4, 0.4], [0.6, 0.6, 0.6, 0.6]],
        )
        self.assertEqual(
            cpd_C,
            TabularCPD(
                "C",
                2,
                [[0.2, 0.2, 0.7, 0.4], [0.8, 0.8, 0.3, 0.6]],
                evidence=["A", "B"],
                evidence_card=[2, 2],
            ),
        )

    def test_estimate_cpd_improper_prior(self):
        cpd_C = self.est1.estimate_cpd(
            "C", prior_type="dirichlet", pseudo_counts=[[0, 0, 0, 0], [0, 0, 0, 0]]
        )
        cpd_C_correct = TabularCPD(
            "C",
            2,
            [[0.0, 0.0, 1.0, np.NaN], [1.0, 1.0, 0.0, np.NaN]],
            evidence=["A", "B"],
            evidence_card=[2, 2],
            state_names={"A": [0, 1], "B": [0, 1], "C": [0, 1]},
        )
        # manual comparison because np.NaN != np.NaN
        self.assertTrue(
            (
                (cpd_C.values == cpd_C_correct.values)
                | np.isnan(cpd_C.values) & np.isnan(cpd_C_correct.values)
            ).all()
        )

    def test_estimate_cpd_shortcuts(self):
        cpd_C1 = self.est2.estimate_cpd(
            "C", prior_type="BDeu", equivalent_sample_size=9
        )
        cpd_C1_correct = TabularCPD(
            "C",
            3,
            [
                [0.2, 0.2, 0.6, 1.0 / 3, 1.0 / 3, 1.0 / 3],
                [0.6, 0.6, 0.2, 1.0 / 3, 1.0 / 3, 1.0 / 3],
                [0.2, 0.2, 0.2, 1.0 / 3, 1.0 / 3, 1.0 / 3],
            ],
            evidence=["A", "B"],
            evidence_card=[3, 2],
        )
        self.assertEqual(cpd_C1, cpd_C1_correct)

        cpd_C2 = self.est3.estimate_cpd("C", prior_type="K2")
        cpd_C2_correct = TabularCPD(
            "C",
            2,
            [
                [0.5, 0.6, 1.0 / 3, 2.0 / 3, 0.75, 2.0 / 3],
                [0.5, 0.4, 2.0 / 3, 1.0 / 3, 0.25, 1.0 / 3],
            ],
            evidence=["A", "B"],
            evidence_card=[3, 2],
        )
        self.assertEqual(cpd_C2, cpd_C2_correct)

    def test_get_parameters(self):
        cpds = set(
            [
                self.est3.estimate_cpd("A"),
                self.est3.estimate_cpd("B"),
                self.est3.estimate_cpd("C"),
            ]
        )
        self.assertSetEqual(set(self.est3.get_parameters()), cpds)

    def test_get_parameters2(self):
        pseudo_counts = {
            "A": [[1], [2], [3]],
            "B": [[4], [5]],
            "C": [[6, 6, 6, 6, 6, 6], [7, 7, 7, 7, 7, 7]],
        }
        cpds = set(
            [
                self.est3.estimate_cpd(
                    "A", prior_type="dirichlet", pseudo_counts=pseudo_counts["A"]
                ),
                self.est3.estimate_cpd(
                    "B", prior_type="dirichlet", pseudo_counts=pseudo_counts["B"]
                ),
                self.est3.estimate_cpd(
                    "C", prior_type="dirichlet", pseudo_counts=pseudo_counts["C"]
                ),
            ]
        )
        self.assertSetEqual(
            set(
                self.est3.get_parameters(
                    prior_type="dirichlet", pseudo_counts=pseudo_counts
                )
            ),
            cpds,
        )

    def tearDown(self):
        del self.m1
        del self.d1
        del self.d2
        del self.est1
        del self.est2