def setUp(self): edges = [['family-out', 'dog-out'], ['bowel-problem', 'dog-out'], ['family-out', 'light-on'], ['dog-out', 'hear-bark']] cpds = { 'bowel-problem': np.array([[0.01], [0.99]]), 'dog-out': np.array([[0.99, 0.01, 0.97, 0.03], [0.9, 0.1, 0.3, 0.7]]), 'family-out': np.array([[0.15], [0.85]]), 'hear-bark': np.array([[0.7, 0.3], [0.01, 0.99]]), 'light-on': np.array([[0.6, 0.4], [0.05, 0.95]]) } states = { 'bowel-problem': ['true', 'false'], 'dog-out': ['true', 'false'], 'family-out': ['true', 'false'], 'hear-bark': ['true', 'false'], 'light-on': ['true', 'false'] } parents = { 'bowel-problem': [], 'dog-out': ['family-out', 'bowel-problem'], 'family-out': [], 'hear-bark': ['dog-out'], 'light-on': ['family-out'] } properties = { 'bowel-problem': ['position = (335, 99)'], 'dog-out': ['position = (300, 195)'], 'family-out': ['position = (257, 99)'], 'hear-bark': ['position = (296, 268)'], 'light-on': ['position = (218, 195)'] } self.model = BayesianModel(edges) tabular_cpds = [] for var in sorted(cpds.keys()): values = cpds[var] cpd = TabularCPD(var, len(states[var]), values, evidence=parents[var], evidence_card=[ len(states[evidence_var]) for evidence_var in parents[var] ]) tabular_cpds.append(cpd) self.model.add_cpds(*tabular_cpds) for node, properties in properties.items(): for prop in properties: prop_name, prop_value = map(lambda t: t.strip(), prop.split('=')) self.model.node[node][prop_name] = prop_value self.writer = BIFWriter(model=self.model)
def test_score_titanic(self): scorer = K2Score(self.titanic_data2) titanic = BayesianModel([("Sex", "Survived"), ("Pclass", "Survived")]) self.assertAlmostEqual(scorer.score(titanic), -1891.0630673606006) titanic2 = BayesianModel([("Pclass", "Sex"), ]) titanic2.add_nodes_from(["Sex", "Survived", "Pclass"]) self.assertLess(scorer.score(titanic2), scorer.score(titanic))
def setUp(self): self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'), ('G', 'L')]) cpd_a = TabularCPD('A', 2, values=[[0.2], [0.8]]) cpd_r = TabularCPD('R', 2, values=[[0.4], [0.6]]) cpd_j = TabularCPD('J', 2, values=[[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], evidence=['A', 'R'], evidence_card=[2, 2]) cpd_q = TabularCPD('Q', 2, values=[[0.9, 0.2], [0.1, 0.8]], evidence=['J'], evidence_card=[2]) cpd_l = TabularCPD('L', 2, values=[[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], evidence=['J', 'G'], evidence_card=[2, 2]) cpd_g = TabularCPD('G', 2, values=[[0.6], [0.4]]) self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r) self.bayesian_inference = VariableElimination(self.bayesian_model)
def setUp(self): self.junction_tree = JunctionTree([(('A', 'B'), ('B', 'C')), (('B', 'C'), ('C', 'D'))]) phi1 = DiscreteFactor(['A', 'B'], [2, 3], range(6)) phi2 = DiscreteFactor(['B', 'C'], [3, 2], range(6)) phi3 = DiscreteFactor(['C', 'D'], [2, 2], range(4)) self.junction_tree.add_factors(phi1, phi2, phi3) self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'), ('G', 'L')]) cpd_a = TabularCPD('A', 2, values=[[0.2], [0.8]]) cpd_r = TabularCPD('R', 2, values=[[0.4], [0.6]]) cpd_j = TabularCPD('J', 2, values=[[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], evidence=['A', 'R'], evidence_card=[2, 2]) cpd_q = TabularCPD('Q', 2, values=[[0.9, 0.2], [0.1, 0.8]], evidence=['J'], evidence_card=[2]) cpd_l = TabularCPD('L', 2, values=[[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], evidence=['J', 'G'], evidence_card=[2, 2]) cpd_g = TabularCPD('G', 2, values=[[0.6], [0.4]]) self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r)
def setUp(self): self.model = BayesianModel([('diff', 'grade'), ('intel', 'grade'), ('intel', 'sat'), ('grade', 'reco')]) raw_data = np.random.randint(low=0, high=2, size=(1000, 5)) data = pd.DataFrame(raw_data, columns=['diff', 'grade', 'intel', 'sat', 'reco']) self.model.fit(data)
def test_score_titanic(self): scorer = BicScore(self.titanic_data2) titanic = BayesianModel([("Sex", "Survived"), ("Pclass", "Survived")]) self.assertAlmostEqual(scorer.score(titanic), -1896.7250012840179) titanic2 = BayesianModel([("Pclass", "Sex"), ]) titanic2.add_nodes_from(["Sex", "Survived", "Pclass"]) self.assertLess(scorer.score(titanic2), scorer.score(titanic))
def setUp(self): # A test Bayesian model diff_cpd = TabularCPD('diff', 2, [[0.6], [0.4]]) intel_cpd = TabularCPD('intel', 2, [[0.7], [0.3]]) grade_cpd = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], evidence=['diff', 'intel'], evidence_card=[2, 2]) self.bayesian_model = BayesianModel() self.bayesian_model.add_nodes_from(['diff', 'intel', 'grade']) self.bayesian_model.add_edges_from([('diff', 'grade'), ('intel', 'grade')]) self.bayesian_model.add_cpds(diff_cpd, intel_cpd, grade_cpd) # A test Markov model self.markov_model = MarkovModel([('A', 'B'), ('C', 'B'), ('B', 'D')]) factor_ab = DiscreteFactor(['A', 'B'], [2, 3], [1, 2, 3, 4, 5, 6]) factor_cb = DiscreteFactor(['C', 'B'], [4, 3], [3, 1, 4, 5, 7, 8, 1, 3, 10, 4, 5, 6]) factor_bd = DiscreteFactor(['B', 'D'], [3, 2], [5, 7, 2, 1, 9, 3]) self.markov_model.add_factors(factor_ab, factor_cb, factor_bd) self.gibbs = GibbsSampling(self.bayesian_model)
def setUp(self): self.d1 = pd.DataFrame(data={'A': [0, 0, 1], 'B': [0, 1, 0], 'C': [1, 1, 0], 'D': ['X', 'Y', 'Z']}) self.m1 = BayesianModel([('A', 'C'), ('B', 'C'), ('D', 'B')]) self.m2 = BayesianModel([('C', 'A'), ('C', 'B'), ('A', 'D')]) # data_link - "https://www.kaggle.com/c/titanic/download/train.csv" self.titanic_data = pd.read_csv('pgm/tests/test_estimators/testdata/titanic_train.csv') self.titanic_data2 = self.titanic_data[["Survived", "Sex", "Pclass"]]
def test_score_titanic(self): scorer = BdeuScore(self.titanic_data2, equivalent_sample_size=25) titanic = BayesianModel([("Sex", "Survived"), ("Pclass", "Survived")]) self.assertAlmostEqual(scorer.score(titanic), -1892.7383393910427) titanic2 = BayesianModel([ ("Pclass", "Sex"), ]) titanic2.add_nodes_from(["Sex", "Survived", "Pclass"]) self.assertLess(scorer.score(titanic2), scorer.score(titanic))
def to_bayesian_model(self): """ Creates a Bayesian Model which is a minimum I-Map for this markov model. The ordering of parents may not remain constant. It would depend on the ordering of variable in the junction tree (which is not constant) all the time. Examples -------- >>> from pgm.models import MarkovModel >>> from pgm.factors.discrete import DiscreteFactor >>> mm = MarkovModel() >>> mm.add_nodes_from(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7']) >>> mm.add_edges_from([('x1', 'x3'), ('x1', 'x4'), ('x2', 'x4'), ... ('x2', 'x5'), ('x3', 'x6'), ('x4', 'x6'), ... ('x4', 'x7'), ('x5', 'x7')]) >>> phi = [DiscreteFactor(edge, [2, 2], np.random.rand(4)) for edge in mm.edges()] >>> mm.add_factors(*phi) >>> bm = mm.to_bayesian_model() """ from pgm.models import BayesianModel bm = BayesianModel() var_clique_dict = defaultdict(tuple) var_order = [] # Create a junction tree from the markov model. # Creation of clique tree involves triangulation, finding maximal cliques # and creating a tree from these cliques junction_tree = self.to_junction_tree() # create an ordering of the nodes based on the ordering of the clique # in which it appeared first root_node = junction_tree.nodes()[0] bfs_edges = nx.bfs_edges(junction_tree, root_node) for node in root_node: var_clique_dict[node] = root_node var_order.append(node) for edge in bfs_edges: clique_node = edge[1] for node in clique_node: if not var_clique_dict[node]: var_clique_dict[node] = clique_node var_order.append(node) # create a bayesian model by adding edges from parent of node to node as # par(x_i) = (var(c_k) - x_i) \cap {x_1, ..., x_{i-1}} for node_index in range(len(var_order)): node = var_order[node_index] node_parents = (set(var_clique_dict[node]) - set([node])).intersection( set(var_order[:node_index])) bm.add_edges_from([(parent, node) for parent in node_parents]) # TODO : Convert factor into CPDs return bm
def setUp(self): self.maxDiff = None edges = [['family-out', 'dog-out'], ['bowel-problem', 'dog-out'], ['family-out', 'light-on'], ['dog-out', 'hear-bark']] cpds = {'bowel-problem': np.array([[0.01], [0.99]]), 'dog-out': np.array([[0.99, 0.01, 0.97, 0.03], [0.9, 0.1, 0.3, 0.7]]), 'family-out': np.array([[0.15], [0.85]]), 'hear-bark': np.array([[0.7, 0.3], [0.01, 0.99]]), 'light-on': np.array([[0.6, 0.4], [0.05, 0.95]])} states = {'bowel-problem': ['true', 'false'], 'dog-out': ['true', 'false'], 'family-out': ['true', 'false'], 'hear-bark': ['true', 'false'], 'light-on': ['true', 'false']} parents = {'bowel-problem': [], 'dog-out': ['bowel-problem', 'family-out'], 'family-out': [], 'hear-bark': ['dog-out'], 'light-on': ['family-out']} self.bayesmodel = BayesianModel(edges) tabular_cpds = [] for var, values in cpds.items(): cpd = TabularCPD(var, len(states[var]), values, evidence=parents[var], evidence_card=[len(states[evidence_var]) for evidence_var in parents[var]]) tabular_cpds.append(cpd) self.bayesmodel.add_cpds(*tabular_cpds) self.bayeswriter = UAIWriter(self.bayesmodel) edges = {('var_0', 'var_1'), ('var_0', 'var_2'), ('var_1', 'var_2')} self.markovmodel = MarkovModel(edges) tables = [(['var_0', 'var_1'], ['4.000', '2.400', '1.000', '0.000']), (['var_0', 'var_1', 'var_2'], ['2.2500', '3.2500', '3.7500', '0.0000', '0.0000', '10.0000', '1.8750', '4.0000', '3.3330', '2.0000', '2.0000', '3.4000'])] domain = {'var_1': '2', 'var_2': '3', 'var_0': '2'} factors = [] for table in tables: variables = table[0] cardinality = [int(domain[var]) for var in variables] values = list(map(float, table[1])) factor = DiscreteFactor(variables, cardinality, values) factors.append(factor) self.markovmodel.add_factors(*factors) self.markovwriter = UAIWriter(self.markovmodel)
def test_get_immoralities(self): G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')]) self.assertEqual(G.get_immoralities(), {('w', 'x'), ('w', 'z')}) G1 = BayesianModel([('x', 'y'), ('z', 'y'), ('z', 'x'), ('w', 'y')]) self.assertEqual(G1.get_immoralities(), {('w', 'x'), ('w', 'z')}) G2 = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y'), ('w', 'x')]) self.assertEqual(G2.get_immoralities(), {('w', 'z')})
def test_moral_graph_with_edge_present_over_parents(self): G = BayesianModel([('a', 'd'), ('d', 'e'), ('b', 'd'), ('b', 'c'), ('a', 'b')]) moral_graph = G.moralize() self.assertListEqual(sorted(moral_graph.nodes()), ['a', 'b', 'c', 'd', 'e']) for edge in moral_graph.edges(): self.assertTrue(edge in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')] or (edge[1], edge[0]) in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')])
class BaseEliminationTest(TestCase): def setUp(self): self.model = BayesianModel([('diff', 'grade'), ('intel', 'grade'), ('intel', 'sat'), ('grade', 'reco')]) raw_data = np.random.randint(low=0, high=2, size=(1000, 5)) data = pd.DataFrame(raw_data, columns=['diff', 'grade', 'intel', 'sat', 'reco']) self.model.fit(data) def tearDown(self): del self.model del self.elimination_order
def test_get_independencies(self): chain = BayesianModel([('X', 'Y'), ('Y', 'Z')]) self.assertEqual(chain.get_independencies(), Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y'))) fork = BayesianModel([('Y', 'X'), ('Y', 'Z')]) self.assertEqual(fork.get_independencies(), Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y'))) collider = BayesianModel([('X', 'Y'), ('Z', 'Y')]) self.assertEqual(collider.get_independencies(), Independencies(('X', 'Z'), ('Z', 'X')))
def setUp(self): self.G = BayesianModel([('a', 'd'), ('b', 'd'), ('d', 'e'), ('b', 'c')]) self.G1 = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) diff_cpd = TabularCPD('diff', 2, values=[[0.2], [0.8]]) intel_cpd = TabularCPD('intel', 3, values=[[0.5], [0.3], [0.2]]) grade_cpd = TabularCPD('grade', 3, values=[[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3]) self.G1.add_cpds(diff_cpd, intel_cpd, grade_cpd)
def test_is_iequivalent(self): G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')]) self.assertRaises(TypeError, G.is_iequivalent, MarkovModel()) G1 = BayesianModel([('V', 'W'), ('W', 'X'), ('X', 'Y'), ('Z', 'Y')]) G2 = BayesianModel([('W', 'V'), ('X', 'W'), ('X', 'Y'), ('Z', 'Y')]) self.assertTrue(G1.is_iequivalent(G2)) G3 = BayesianModel([('W', 'V'), ('W', 'X'), ('Y', 'X'), ('Z', 'Y')]) self.assertFalse(G3.is_iequivalent(G2))
def test_legal_operations_titanic(self): est = self.est_titanic1 start_model = BayesianModel([("Survived", "Sex"), ("Pclass", "Age"), ("Pclass", "Embarked")]) legal_ops = est._legal_operations(start_model) self.assertEqual(len(list(legal_ops)), 20) tabu_list = [('-', ("Survived", "Sex")), ('-', ("Survived", "Pclass")), ('flip', ("Age", "Pclass"))] legal_ops_tabu = est._legal_operations(start_model, tabu_list=tabu_list) self.assertEqual(len(list(legal_ops_tabu)), 18) legal_ops_indegree = est._legal_operations(start_model, max_indegree=1) self.assertEqual(len(list(legal_ops_indegree)), 11) legal_ops_both = est._legal_operations(start_model, tabu_list=tabu_list, max_indegree=1) legal_ops_both_ref = [ (('+', ('Embarked', 'Survived')), 10.050632580087608), (('+', ('Survived', 'Pclass')), 41.88868046549101), (('+', ('Age', 'Survived')), -23.635716036430836), (('+', ('Pclass', 'Survived')), 41.81314459373226), (('+', ('Sex', 'Pclass')), 4.772261678792802), (('-', ('Pclass', 'Age')), 11.546515590731815), (('-', ('Pclass', 'Embarked')), -32.171482832532774), (('flip', ('Pclass', 'Embarked')), 3.3563814191281836), (('flip', ('Survived', 'Sex')), 0.039737027979640516) ] self.assertSetEqual(set(legal_ops_both), set(legal_ops_both_ref))
def test_get_cpds1(self): self.model = BayesianModel([('A', 'AB')]) cpd_a = TabularCPD('A', 2, values=np.random.rand(2, 1)) cpd_ab = TabularCPD('AB', 2, values=np.random.rand(2, 2), evidence=['A'], evidence_card=[2]) self.model.add_cpds(cpd_a, cpd_ab) self.assertEqual(self.model.get_cpds('A').variable, 'A') self.assertEqual(self.model.get_cpds('AB').variable, 'AB') self.assertRaises(ValueError, self.model.get_cpds, 'B') self.model.add_node('B') self.assertRaises(ValueError, self.model.get_cpds, 'B')
def setUp(self): self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'), ('G', 'L')]) cpd_a = TabularCPD('A', 2, [[0.2], [0.8]]) cpd_r = TabularCPD('R', 2, [[0.4], [0.6]]) cpd_j = TabularCPD('J', 2, [[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], ['R', 'A'], [2, 2]) cpd_q = TabularCPD('Q', 2, [[0.9, 0.2], [0.1, 0.8]], ['J'], [2]) cpd_l = TabularCPD('L', 2, [[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], ['G', 'J'], [2, 2]) cpd_g = TabularCPD('G', 2, [[0.6], [0.4]]) self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r) self.sampling_inference = BayesianModelSampling(self.bayesian_model) self.markov_model = MarkovModel()
def test_estimate_rand(self): est1 = self.est_rand.estimate() self.assertSetEqual(set(est1.nodes()), set(['A', 'B', 'C'])) self.assertTrue(est1.edges() == [('B', 'C')] or est1.edges() == [('C', 'B')]) est2 = self.est_rand.estimate(start=BayesianModel([('A', 'B'), ('A', 'C')])) self.assertTrue(est2.edges() == [('B', 'C')] or est2.edges() == [('C', 'B')])
def setUp(self): self.bayesian = BayesianModel([('a', 'b'), ('b', 'c'), ('c', 'd'), ('d', 'e')]) a_cpd = TabularCPD('a', 2, [[0.4, 0.6]]) b_cpd = TabularCPD('b', 2, [[0.2, 0.4], [0.8, 0.6]], evidence=['a'], evidence_card=[2]) c_cpd = TabularCPD('c', 2, [[0.1, 0.2], [0.9, 0.8]], evidence=['b'], evidence_card=[2]) d_cpd = TabularCPD('d', 2, [[0.4, 0.3], [0.6, 0.7]], evidence=['c'], evidence_card=[2]) e_cpd = TabularCPD('e', 2, [[0.3, 0.2], [0.7, 0.8]], evidence=['d'], evidence_card=[2]) self.bayesian.add_cpds(a_cpd, b_cpd, c_cpd, d_cpd, e_cpd) self.markov = MarkovModel([('a', 'b'), ('b', 'd'), ('a', 'c'), ('c', 'd')]) factor_1 = DiscreteFactor(['a', 'b'], [2, 2], np.array([100, 1, 1, 100])) factor_2 = DiscreteFactor(['a', 'c'], [2, 2], np.array([40, 30, 100, 20])) factor_3 = DiscreteFactor(['b', 'd'], [2, 2], np.array([1, 100, 100, 1])) factor_4 = DiscreteFactor(['c', 'd'], [2, 2], np.array([60, 60, 40, 40])) self.markov.add_factors(factor_1, factor_2, factor_3, factor_4)
def setUp(self): self.m1 = BayesianModel([('A', 'C'), ('B', 'C')]) self.d1 = pd.DataFrame(data={'A': [0, 0, 1], 'B': [0, 1, 0], 'C': [1, 1, 0]}) self.d2 = pd.DataFrame(data={'A': [0, 0, 1, 0, 2, 0, 2, 1, 0, 2], 'B': ['X', 'Y', 'X', 'Y', 'X', 'Y', 'X', 'Y', 'X', 'Y'], 'C': [1, 1, 1, 0, 0, 0, 0, 0, 0, 0]}) self.est1 = BayesianEstimator(self.m1, self.d1) self.est2 = BayesianEstimator(self.m1, self.d1, state_names={'A': [0, 1, 2], 'B': [0, 1], 'C': [0, 1, 23]}) self.est3 = BayesianEstimator(self.m1, self.d2)
def setUp(self): self.rand_data = pd.DataFrame(np.random.randint(0, 5, size=(5000, 2)), columns=list('AB')) self.rand_data['C'] = self.rand_data['B'] self.est_rand = HillClimbSearch(self.rand_data, scoring_method=K2Score(self.rand_data)) self.model1 = BayesianModel() self.model1.add_nodes_from(['A', 'B', 'C']) self.model2 = self.model1.copy() self.model2.add_edge('A', 'B') # link to dataset: "https://www.kaggle.com/c/titanic/download/train.csv" self.titanic_data = pd.read_csv( 'pgm/tests/test_estimators/testdata/titanic_train.csv') self.titanic_data1 = self.titanic_data[[ "Survived", "Sex", "Pclass", "Age", "Embarked" ]] self.titanic_data2 = self.titanic_data[["Survived", "Sex", "Pclass"]] self.est_titanic1 = HillClimbSearch(self.titanic_data1) self.est_titanic2 = HillClimbSearch(self.titanic_data2)
def estimate(self): """ Estimates the `BayesianModel` structure that fits best to the given data set, according to the scoring method supplied in the constructor. Exhaustively searches through all models. Only estimates network structure, no parametrization. Returns ------- model: `BayesianModel` instance A `BayesianModel` with maximal score. Examples -------- >>> import pandas as pd >>> import numpy as np >>> from pgm.estimators import ExhaustiveSearch >>> # create random data sample with 3 variables, where B and C are identical: >>> data = pd.DataFrame(np.random.randint(0, 5, size=(5000, 2)), columns=list('AB')) >>> data['C'] = data['B'] >>> est = ExhaustiveSearch(data) >>> best_model = est.estimate() >>> best_model <pgm.models.BayesianModel.BayesianModel object at 0x7f695c535470> >>> best_model.edges() [('B', 'C')] """ best_dag = max(self.all_dags(), key=self.scoring_method.score) best_model = BayesianModel() best_model.add_nodes_from(sorted(best_dag.nodes())) best_model.add_edges_from(sorted(best_dag.edges())) return best_model
def get_model(self): model = BayesianModel(self.get_edges()) model.name = self.network_name tabular_cpds = [] for var, values in self.variable_CPD.items(): evidence_card = [ len(self.variable_states[evidence_var]) for evidence_var in self.variable_parents[var] ] cpd = TabularCPD(var, len(self.variable_states[var]), values, evidence=self.variable_parents[var], evidence_card=evidence_card, state_names=self.get_states()) tabular_cpds.append(cpd) model.add_cpds(*tabular_cpds) for node, properties in self.variable_property.items(): for prop in properties: if prop is not None: prop_name, prop_value = map(lambda t: t.strip(), prop.split('=')) model.node[node][prop_name] = prop_value return model
def get_model(self): """ Returns the model instance of the ProbModel. Return --------------- model: an instance of BayesianModel. Examples ------- >>> reader = ProbModelXMLReader() >>> reader.get_model() """ if self.probnet.get('type') == "BayesianNetwork": model = BayesianModel(self.probnet['edges'].keys()) tabular_cpds = [] cpds = self.probnet['Potentials'] for cpd in cpds: var = list(cpd['Variables'].keys())[0] states = self.probnet['Variables'][var]['States'] evidence = cpd['Variables'][var] evidence_card = [ len(self.probnet['Variables'][evidence_var]['States']) for evidence_var in evidence ] arr = list(map(float, cpd['Values'].split())) values = np.array(arr) values = values.reshape( (len(states), values.size // len(states))) tabular_cpds.append( TabularCPD(var, len(states), values, evidence, evidence_card)) model.add_cpds(*tabular_cpds) variables = model.nodes() for var in variables: for prop_name, prop_value in self.probnet['Variables'][ var].items(): model.node[var][prop_name] = prop_value edges = model.edges() for edge in edges: for prop_name, prop_value in self.probnet['edges'][edge].items( ): model.edge[edge[0]][edge[1]][prop_name] = prop_value return model else: raise ValueError("Please specify only Bayesian Network.")
def __init__(self, model): self.model = model model.check_model() if isinstance(model, JunctionTree): self.variables = set(chain(*model.nodes())) else: self.variables = model.nodes() self.cardinality = {} self.factors = defaultdict(list) if isinstance(model, BayesianModel): for node in model.nodes(): cpd = model.get_cpds(node) cpd_as_factor = cpd.to_factor() self.cardinality[node] = cpd.variable_card for var in cpd.variables: self.factors[var].append(cpd_as_factor) elif isinstance(model, (MarkovModel, FactorGraph, JunctionTree)): self.cardinality = model.get_cardinality() for factor in model.get_factors(): for var in factor.variables: self.factors[var].append(factor) elif isinstance(model, DynamicBayesianNetwork): self.start_bayesian_model = BayesianModel(model.get_intra_edges(0)) self.start_bayesian_model.add_cpds(*model.get_cpds(time_slice=0)) cpd_inter = [ model.get_cpds(node) for node in model.get_interface_nodes(1) ] self.interface_nodes = model.get_interface_nodes(0) self.one_and_half_model = BayesianModel(model.get_inter_edges() + model.get_intra_edges(1)) self.one_and_half_model.add_cpds(*(model.get_cpds(time_slice=1) + cpd_inter))
def minimal_imap(self, order): """ Returns a Bayesian Model which is minimal IMap of the Joint Probability Distribution considering the order of the variables. Parameters ---------- order: array-like The order of the random variables. Examples -------- >>> import numpy as np >>> from pgm.factors.discrete import JointProbabilityDistribution >>> prob = JointProbabilityDistribution(['x1', 'x2', 'x3'], [2, 3, 2], np.ones(12)/12) >>> bayesian_model = prob.minimal_imap(order=['x2', 'x1', 'x3']) >>> bayesian_model <pgm.models.models.models at 0x7fd7440a9320> >>> bayesian_model.edges() [('x1', 'x3'), ('x2', 'x3')] """ from pgm.models import BayesianModel def get_subsets(u): for r in range(len(u) + 1): for i in itertools.combinations(u, r): yield i G = BayesianModel() for variable_index in range(len(order)): u = order[:variable_index] for subset in get_subsets(u): if (len(subset) < len(u) and self.check_independence([order[variable_index]], set(u) - set(subset), subset, True)): G.add_edges_from([(variable, order[variable_index]) for variable in subset]) return G
def get_model(self): """ Returns the fitted bayesian model Example ---------- >>> from pgm.readwrite import BIFReader >>> reader = BIFReader("bif_test.bif") >>> reader.get_model() <pgm.models.BayesianModel.BayesianModel object at 0x7f20af154320> """ try: model = BayesianModel(self.variable_edges) model.name = self.network_name model.add_nodes_from(self.variable_names) tabular_cpds = [] for var in sorted(self.variable_cpds.keys()): values = self.variable_cpds[var] cpd = TabularCPD( var, len(self.variable_states[var]), values, evidence=self.variable_parents[var], evidence_card=[ len(self.variable_states[evidence_var]) for evidence_var in self.variable_parents[var] ]) tabular_cpds.append(cpd) model.add_cpds(*tabular_cpds) for node, properties in self.variable_properties.items(): for prop in properties: prop_name, prop_value = map(lambda t: t.strip(), prop.split('=')) model.node[node][prop_name] = prop_value return model except AttributeError: raise AttributeError( 'First get states of variables, edges, parents and network name' )