Пример #1
0
    def setUp(self):
        edges = [['family-out', 'dog-out'], ['bowel-problem', 'dog-out'],
                 ['family-out', 'light-on'], ['dog-out', 'hear-bark']]

        cpds = {
            'bowel-problem': np.array([[0.01], [0.99]]),
            'dog-out': np.array([[0.99, 0.01, 0.97, 0.03],
                                 [0.9, 0.1, 0.3, 0.7]]),
            'family-out': np.array([[0.15], [0.85]]),
            'hear-bark': np.array([[0.7, 0.3], [0.01, 0.99]]),
            'light-on': np.array([[0.6, 0.4], [0.05, 0.95]])
        }

        states = {
            'bowel-problem': ['true', 'false'],
            'dog-out': ['true', 'false'],
            'family-out': ['true', 'false'],
            'hear-bark': ['true', 'false'],
            'light-on': ['true', 'false']
        }

        parents = {
            'bowel-problem': [],
            'dog-out': ['family-out', 'bowel-problem'],
            'family-out': [],
            'hear-bark': ['dog-out'],
            'light-on': ['family-out']
        }

        properties = {
            'bowel-problem': ['position = (335, 99)'],
            'dog-out': ['position = (300, 195)'],
            'family-out': ['position = (257, 99)'],
            'hear-bark': ['position = (296, 268)'],
            'light-on': ['position = (218, 195)']
        }

        self.model = BayesianModel(edges)

        tabular_cpds = []
        for var in sorted(cpds.keys()):
            values = cpds[var]
            cpd = TabularCPD(var,
                             len(states[var]),
                             values,
                             evidence=parents[var],
                             evidence_card=[
                                 len(states[evidence_var])
                                 for evidence_var in parents[var]
                             ])
            tabular_cpds.append(cpd)
        self.model.add_cpds(*tabular_cpds)

        for node, properties in properties.items():
            for prop in properties:
                prop_name, prop_value = map(lambda t: t.strip(),
                                            prop.split('='))
                self.model.node[node][prop_name] = prop_value

        self.writer = BIFWriter(model=self.model)
Пример #2
0
 def test_score_titanic(self):
     scorer = K2Score(self.titanic_data2)
     titanic = BayesianModel([("Sex", "Survived"), ("Pclass", "Survived")])
     self.assertAlmostEqual(scorer.score(titanic), -1891.0630673606006)
     titanic2 = BayesianModel([("Pclass", "Sex"), ])
     titanic2.add_nodes_from(["Sex", "Survived", "Pclass"])
     self.assertLess(scorer.score(titanic2), scorer.score(titanic))
Пример #3
0
    def setUp(self):
        self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'),
                                             ('J', 'Q'), ('J', 'L'),
                                             ('G', 'L')])
        cpd_a = TabularCPD('A', 2, values=[[0.2], [0.8]])
        cpd_r = TabularCPD('R', 2, values=[[0.4], [0.6]])
        cpd_j = TabularCPD('J',
                           2,
                           values=[[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]],
                           evidence=['A', 'R'],
                           evidence_card=[2, 2])
        cpd_q = TabularCPD('Q',
                           2,
                           values=[[0.9, 0.2], [0.1, 0.8]],
                           evidence=['J'],
                           evidence_card=[2])
        cpd_l = TabularCPD('L',
                           2,
                           values=[[0.9, 0.45, 0.8, 0.1],
                                   [0.1, 0.55, 0.2, 0.9]],
                           evidence=['J', 'G'],
                           evidence_card=[2, 2])
        cpd_g = TabularCPD('G', 2, values=[[0.6], [0.4]])
        self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r)

        self.bayesian_inference = VariableElimination(self.bayesian_model)
Пример #4
0
    def setUp(self):
        self.junction_tree = JunctionTree([(('A', 'B'), ('B', 'C')),
                                           (('B', 'C'), ('C', 'D'))])
        phi1 = DiscreteFactor(['A', 'B'], [2, 3], range(6))
        phi2 = DiscreteFactor(['B', 'C'], [3, 2], range(6))
        phi3 = DiscreteFactor(['C', 'D'], [2, 2], range(4))
        self.junction_tree.add_factors(phi1, phi2, phi3)

        self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'),
                                             ('J', 'Q'), ('J', 'L'),
                                             ('G', 'L')])
        cpd_a = TabularCPD('A', 2, values=[[0.2], [0.8]])
        cpd_r = TabularCPD('R', 2, values=[[0.4], [0.6]])
        cpd_j = TabularCPD('J',
                           2,
                           values=[[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]],
                           evidence=['A', 'R'],
                           evidence_card=[2, 2])
        cpd_q = TabularCPD('Q',
                           2,
                           values=[[0.9, 0.2], [0.1, 0.8]],
                           evidence=['J'],
                           evidence_card=[2])
        cpd_l = TabularCPD('L',
                           2,
                           values=[[0.9, 0.45, 0.8, 0.1],
                                   [0.1, 0.55, 0.2, 0.9]],
                           evidence=['J', 'G'],
                           evidence_card=[2, 2])
        cpd_g = TabularCPD('G', 2, values=[[0.6], [0.4]])
        self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r)
Пример #5
0
 def setUp(self):
     self.model = BayesianModel([('diff', 'grade'), ('intel', 'grade'),
                                 ('intel', 'sat'), ('grade', 'reco')])
     raw_data = np.random.randint(low=0, high=2, size=(1000, 5))
     data = pd.DataFrame(raw_data,
                         columns=['diff', 'grade', 'intel', 'sat', 'reco'])
     self.model.fit(data)
Пример #6
0
 def test_score_titanic(self):
     scorer = BicScore(self.titanic_data2)
     titanic = BayesianModel([("Sex", "Survived"), ("Pclass", "Survived")])
     self.assertAlmostEqual(scorer.score(titanic), -1896.7250012840179)
     titanic2 = BayesianModel([("Pclass", "Sex"), ])
     titanic2.add_nodes_from(["Sex", "Survived", "Pclass"])
     self.assertLess(scorer.score(titanic2), scorer.score(titanic))
Пример #7
0
    def setUp(self):
        # A test Bayesian model
        diff_cpd = TabularCPD('diff', 2, [[0.6], [0.4]])
        intel_cpd = TabularCPD('intel', 2, [[0.7], [0.3]])
        grade_cpd = TabularCPD('grade',
                               3,
                               [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, 0.08, 0.3],
                                [0.3, 0.7, 0.02, 0.2]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 2])
        self.bayesian_model = BayesianModel()
        self.bayesian_model.add_nodes_from(['diff', 'intel', 'grade'])
        self.bayesian_model.add_edges_from([('diff', 'grade'),
                                            ('intel', 'grade')])
        self.bayesian_model.add_cpds(diff_cpd, intel_cpd, grade_cpd)

        # A test Markov model
        self.markov_model = MarkovModel([('A', 'B'), ('C', 'B'), ('B', 'D')])
        factor_ab = DiscreteFactor(['A', 'B'], [2, 3], [1, 2, 3, 4, 5, 6])
        factor_cb = DiscreteFactor(['C', 'B'], [4, 3],
                                   [3, 1, 4, 5, 7, 8, 1, 3, 10, 4, 5, 6])
        factor_bd = DiscreteFactor(['B', 'D'], [3, 2], [5, 7, 2, 1, 9, 3])
        self.markov_model.add_factors(factor_ab, factor_cb, factor_bd)

        self.gibbs = GibbsSampling(self.bayesian_model)
Пример #8
0
    def setUp(self):
        self.d1 = pd.DataFrame(data={'A': [0, 0, 1], 'B': [0, 1, 0], 'C': [1, 1, 0], 'D': ['X', 'Y', 'Z']})
        self.m1 = BayesianModel([('A', 'C'), ('B', 'C'), ('D', 'B')])
        self.m2 = BayesianModel([('C', 'A'), ('C', 'B'), ('A', 'D')])

        # data_link - "https://www.kaggle.com/c/titanic/download/train.csv"
        self.titanic_data = pd.read_csv('pgm/tests/test_estimators/testdata/titanic_train.csv')
        self.titanic_data2 = self.titanic_data[["Survived", "Sex", "Pclass"]]
Пример #9
0
 def test_score_titanic(self):
     scorer = BdeuScore(self.titanic_data2, equivalent_sample_size=25)
     titanic = BayesianModel([("Sex", "Survived"), ("Pclass", "Survived")])
     self.assertAlmostEqual(scorer.score(titanic), -1892.7383393910427)
     titanic2 = BayesianModel([
         ("Pclass", "Sex"),
     ])
     titanic2.add_nodes_from(["Sex", "Survived", "Pclass"])
     self.assertLess(scorer.score(titanic2), scorer.score(titanic))
Пример #10
0
    def to_bayesian_model(self):
        """
        Creates a Bayesian Model which is a minimum I-Map for this markov model.

        The ordering of parents may not remain constant. It would depend on the
        ordering of variable in the junction tree (which is not constant) all the
        time.

        Examples
        --------
        >>> from pgm.models import MarkovModel
        >>> from pgm.factors.discrete import DiscreteFactor
        >>> mm = MarkovModel()
        >>> mm.add_nodes_from(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7'])
        >>> mm.add_edges_from([('x1', 'x3'), ('x1', 'x4'), ('x2', 'x4'),
        ...                    ('x2', 'x5'), ('x3', 'x6'), ('x4', 'x6'),
        ...                    ('x4', 'x7'), ('x5', 'x7')])
        >>> phi = [DiscreteFactor(edge, [2, 2], np.random.rand(4)) for edge in mm.edges()]
        >>> mm.add_factors(*phi)
        >>> bm = mm.to_bayesian_model()
        """
        from pgm.models import BayesianModel

        bm = BayesianModel()
        var_clique_dict = defaultdict(tuple)
        var_order = []

        # Create a junction tree from the markov model.
        # Creation of clique tree involves triangulation, finding maximal cliques
        # and creating a tree from these cliques
        junction_tree = self.to_junction_tree()

        # create an ordering of the nodes based on the ordering of the clique
        # in which it appeared first
        root_node = junction_tree.nodes()[0]
        bfs_edges = nx.bfs_edges(junction_tree, root_node)
        for node in root_node:
            var_clique_dict[node] = root_node
            var_order.append(node)
        for edge in bfs_edges:
            clique_node = edge[1]
            for node in clique_node:
                if not var_clique_dict[node]:
                    var_clique_dict[node] = clique_node
                    var_order.append(node)

        # create a bayesian model by adding edges from parent of node to node as
        # par(x_i) = (var(c_k) - x_i) \cap {x_1, ..., x_{i-1}}
        for node_index in range(len(var_order)):
            node = var_order[node_index]
            node_parents = (set(var_clique_dict[node]) -
                            set([node])).intersection(
                                set(var_order[:node_index]))
            bm.add_edges_from([(parent, node) for parent in node_parents])
            # TODO : Convert factor into CPDs
        return bm
Пример #11
0
    def setUp(self):
        self.maxDiff = None
        edges = [['family-out', 'dog-out'],
                 ['bowel-problem', 'dog-out'],
                 ['family-out', 'light-on'],
                 ['dog-out', 'hear-bark']]
        cpds = {'bowel-problem': np.array([[0.01],
                                           [0.99]]),
                'dog-out': np.array([[0.99, 0.01, 0.97, 0.03],
                                     [0.9, 0.1, 0.3, 0.7]]),
                'family-out': np.array([[0.15],
                                        [0.85]]),
                'hear-bark': np.array([[0.7, 0.3],
                                       [0.01, 0.99]]),
                'light-on': np.array([[0.6, 0.4],
                                      [0.05, 0.95]])}
        states = {'bowel-problem': ['true', 'false'],
                  'dog-out': ['true', 'false'],
                  'family-out': ['true', 'false'],
                  'hear-bark': ['true', 'false'],
                  'light-on': ['true', 'false']}
        parents = {'bowel-problem': [],
                   'dog-out': ['bowel-problem', 'family-out'],
                   'family-out': [],
                   'hear-bark': ['dog-out'],
                   'light-on': ['family-out']}

        self.bayesmodel = BayesianModel(edges)

        tabular_cpds = []
        for var, values in cpds.items():
            cpd = TabularCPD(var, len(states[var]), values,
                             evidence=parents[var],
                             evidence_card=[len(states[evidence_var])
                                            for evidence_var in parents[var]])
            tabular_cpds.append(cpd)
        self.bayesmodel.add_cpds(*tabular_cpds)
        self.bayeswriter = UAIWriter(self.bayesmodel)

        edges = {('var_0', 'var_1'), ('var_0', 'var_2'), ('var_1', 'var_2')}
        self.markovmodel = MarkovModel(edges)
        tables = [(['var_0', 'var_1'],
                   ['4.000', '2.400', '1.000', '0.000']),
                  (['var_0', 'var_1', 'var_2'],
                   ['2.2500', '3.2500', '3.7500', '0.0000', '0.0000', '10.0000',
                    '1.8750', '4.0000', '3.3330', '2.0000', '2.0000', '3.4000'])]
        domain = {'var_1': '2', 'var_2': '3', 'var_0': '2'}
        factors = []
        for table in tables:
            variables = table[0]
            cardinality = [int(domain[var]) for var in variables]
            values = list(map(float, table[1]))
            factor = DiscreteFactor(variables, cardinality, values)
            factors.append(factor)
        self.markovmodel.add_factors(*factors)
        self.markovwriter = UAIWriter(self.markovmodel)
Пример #12
0
 def test_get_immoralities(self):
     G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')])
     self.assertEqual(G.get_immoralities(), {('w', 'x'), ('w', 'z')})
     G1 = BayesianModel([('x', 'y'), ('z', 'y'), ('z', 'x'), ('w', 'y')])
     self.assertEqual(G1.get_immoralities(), {('w', 'x'), ('w', 'z')})
     G2 = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y'),
                         ('w', 'x')])
     self.assertEqual(G2.get_immoralities(), {('w', 'z')})
Пример #13
0
 def test_moral_graph_with_edge_present_over_parents(self):
     G = BayesianModel([('a', 'd'), ('d', 'e'), ('b', 'd'), ('b', 'c'),
                        ('a', 'b')])
     moral_graph = G.moralize()
     self.assertListEqual(sorted(moral_graph.nodes()),
                          ['a', 'b', 'c', 'd', 'e'])
     for edge in moral_graph.edges():
         self.assertTrue(edge in [('a', 'b'), ('c', 'b'), ('d', 'a'),
                                  ('d', 'b'), ('d', 'e')]
                         or (edge[1], edge[0]) in [('a', 'b'), ('c', 'b'),
                                                   ('d', 'a'), ('d', 'b'),
                                                   ('d', 'e')])
Пример #14
0
class BaseEliminationTest(TestCase):
    def setUp(self):
        self.model = BayesianModel([('diff', 'grade'), ('intel', 'grade'),
                                    ('intel', 'sat'), ('grade', 'reco')])
        raw_data = np.random.randint(low=0, high=2, size=(1000, 5))
        data = pd.DataFrame(raw_data,
                            columns=['diff', 'grade', 'intel', 'sat', 'reco'])
        self.model.fit(data)

    def tearDown(self):
        del self.model
        del self.elimination_order
Пример #15
0
 def test_get_independencies(self):
     chain = BayesianModel([('X', 'Y'), ('Y', 'Z')])
     self.assertEqual(chain.get_independencies(),
                      Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y')))
     fork = BayesianModel([('Y', 'X'), ('Y', 'Z')])
     self.assertEqual(fork.get_independencies(),
                      Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y')))
     collider = BayesianModel([('X', 'Y'), ('Z', 'Y')])
     self.assertEqual(collider.get_independencies(),
                      Independencies(('X', 'Z'), ('Z', 'X')))
Пример #16
0
 def setUp(self):
     self.G = BayesianModel([('a', 'd'), ('b', 'd'), ('d', 'e'),
                             ('b', 'c')])
     self.G1 = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
     diff_cpd = TabularCPD('diff', 2, values=[[0.2], [0.8]])
     intel_cpd = TabularCPD('intel', 3, values=[[0.5], [0.3], [0.2]])
     grade_cpd = TabularCPD('grade',
                            3,
                            values=[[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                    [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                    [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                            evidence=['diff', 'intel'],
                            evidence_card=[2, 3])
     self.G1.add_cpds(diff_cpd, intel_cpd, grade_cpd)
Пример #17
0
 def test_is_iequivalent(self):
     G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')])
     self.assertRaises(TypeError, G.is_iequivalent, MarkovModel())
     G1 = BayesianModel([('V', 'W'), ('W', 'X'), ('X', 'Y'), ('Z', 'Y')])
     G2 = BayesianModel([('W', 'V'), ('X', 'W'), ('X', 'Y'), ('Z', 'Y')])
     self.assertTrue(G1.is_iequivalent(G2))
     G3 = BayesianModel([('W', 'V'), ('W', 'X'), ('Y', 'X'), ('Z', 'Y')])
     self.assertFalse(G3.is_iequivalent(G2))
Пример #18
0
    def test_legal_operations_titanic(self):
        est = self.est_titanic1
        start_model = BayesianModel([("Survived", "Sex"), ("Pclass", "Age"),
                                     ("Pclass", "Embarked")])

        legal_ops = est._legal_operations(start_model)
        self.assertEqual(len(list(legal_ops)), 20)

        tabu_list = [('-', ("Survived", "Sex")), ('-', ("Survived", "Pclass")),
                     ('flip', ("Age", "Pclass"))]
        legal_ops_tabu = est._legal_operations(start_model,
                                               tabu_list=tabu_list)
        self.assertEqual(len(list(legal_ops_tabu)), 18)

        legal_ops_indegree = est._legal_operations(start_model, max_indegree=1)
        self.assertEqual(len(list(legal_ops_indegree)), 11)

        legal_ops_both = est._legal_operations(start_model,
                                               tabu_list=tabu_list,
                                               max_indegree=1)
        legal_ops_both_ref = [
            (('+', ('Embarked', 'Survived')), 10.050632580087608),
            (('+', ('Survived', 'Pclass')), 41.88868046549101),
            (('+', ('Age', 'Survived')), -23.635716036430836),
            (('+', ('Pclass', 'Survived')), 41.81314459373226),
            (('+', ('Sex', 'Pclass')), 4.772261678792802),
            (('-', ('Pclass', 'Age')), 11.546515590731815),
            (('-', ('Pclass', 'Embarked')), -32.171482832532774),
            (('flip', ('Pclass', 'Embarked')), 3.3563814191281836),
            (('flip', ('Survived', 'Sex')), 0.039737027979640516)
        ]
        self.assertSetEqual(set(legal_ops_both), set(legal_ops_both_ref))
Пример #19
0
    def test_get_cpds1(self):
        self.model = BayesianModel([('A', 'AB')])
        cpd_a = TabularCPD('A', 2, values=np.random.rand(2, 1))
        cpd_ab = TabularCPD('AB',
                            2,
                            values=np.random.rand(2, 2),
                            evidence=['A'],
                            evidence_card=[2])

        self.model.add_cpds(cpd_a, cpd_ab)
        self.assertEqual(self.model.get_cpds('A').variable, 'A')
        self.assertEqual(self.model.get_cpds('AB').variable, 'AB')
        self.assertRaises(ValueError, self.model.get_cpds, 'B')

        self.model.add_node('B')
        self.assertRaises(ValueError, self.model.get_cpds, 'B')
Пример #20
0
 def setUp(self):
     self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'),
                                          ('J', 'Q'), ('J', 'L'),
                                          ('G', 'L')])
     cpd_a = TabularCPD('A', 2, [[0.2], [0.8]])
     cpd_r = TabularCPD('R', 2, [[0.4], [0.6]])
     cpd_j = TabularCPD('J', 2,
                        [[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]],
                        ['R', 'A'], [2, 2])
     cpd_q = TabularCPD('Q', 2, [[0.9, 0.2], [0.1, 0.8]], ['J'], [2])
     cpd_l = TabularCPD('L', 2,
                        [[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]],
                        ['G', 'J'], [2, 2])
     cpd_g = TabularCPD('G', 2, [[0.6], [0.4]])
     self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r)
     self.sampling_inference = BayesianModelSampling(self.bayesian_model)
     self.markov_model = MarkovModel()
Пример #21
0
    def test_estimate_rand(self):
        est1 = self.est_rand.estimate()
        self.assertSetEqual(set(est1.nodes()), set(['A', 'B', 'C']))
        self.assertTrue(est1.edges() == [('B', 'C')]
                        or est1.edges() == [('C', 'B')])

        est2 = self.est_rand.estimate(start=BayesianModel([('A',
                                                            'B'), ('A', 'C')]))
        self.assertTrue(est2.edges() == [('B', 'C')]
                        or est2.edges() == [('C', 'B')])
Пример #22
0
    def setUp(self):
        self.bayesian = BayesianModel([('a', 'b'), ('b', 'c'), ('c', 'd'), ('d', 'e')])
        a_cpd = TabularCPD('a', 2, [[0.4, 0.6]])
        b_cpd = TabularCPD('b', 2, [[0.2, 0.4], [0.8, 0.6]], evidence=['a'],
                           evidence_card=[2])
        c_cpd = TabularCPD('c', 2, [[0.1, 0.2], [0.9, 0.8]], evidence=['b'],
                           evidence_card=[2])
        d_cpd = TabularCPD('d', 2, [[0.4, 0.3], [0.6, 0.7]], evidence=['c'],
                           evidence_card=[2])
        e_cpd = TabularCPD('e', 2, [[0.3, 0.2], [0.7, 0.8]], evidence=['d'],
                           evidence_card=[2])
        self.bayesian.add_cpds(a_cpd, b_cpd, c_cpd, d_cpd, e_cpd)

        self.markov = MarkovModel([('a', 'b'), ('b', 'd'), ('a', 'c'), ('c', 'd')])
        factor_1 = DiscreteFactor(['a', 'b'], [2, 2], np.array([100, 1, 1, 100]))
        factor_2 = DiscreteFactor(['a', 'c'], [2, 2], np.array([40, 30, 100, 20]))
        factor_3 = DiscreteFactor(['b', 'd'], [2, 2], np.array([1, 100, 100, 1]))
        factor_4 = DiscreteFactor(['c', 'd'], [2, 2], np.array([60, 60, 40, 40]))
        self.markov.add_factors(factor_1, factor_2, factor_3, factor_4)
Пример #23
0
 def setUp(self):
     self.m1 = BayesianModel([('A', 'C'), ('B', 'C')])
     self.d1 = pd.DataFrame(data={'A': [0, 0, 1], 'B': [0, 1, 0], 'C': [1, 1, 0]})
     self.d2 = pd.DataFrame(data={'A': [0, 0, 1, 0, 2, 0, 2, 1, 0, 2],
                                  'B': ['X', 'Y', 'X', 'Y', 'X', 'Y', 'X', 'Y', 'X', 'Y'],
                                  'C': [1, 1, 1, 0, 0, 0, 0, 0, 0, 0]})
     self.est1 = BayesianEstimator(self.m1, self.d1)
     self.est2 = BayesianEstimator(self.m1, self.d1, state_names={'A': [0, 1, 2],
                                                                  'B': [0, 1],
                                                                  'C': [0, 1, 23]})
     self.est3 = BayesianEstimator(self.m1, self.d2)
Пример #24
0
    def setUp(self):
        self.rand_data = pd.DataFrame(np.random.randint(0, 5, size=(5000, 2)),
                                      columns=list('AB'))
        self.rand_data['C'] = self.rand_data['B']
        self.est_rand = HillClimbSearch(self.rand_data,
                                        scoring_method=K2Score(self.rand_data))
        self.model1 = BayesianModel()
        self.model1.add_nodes_from(['A', 'B', 'C'])
        self.model2 = self.model1.copy()
        self.model2.add_edge('A', 'B')

        # link to dataset: "https://www.kaggle.com/c/titanic/download/train.csv"
        self.titanic_data = pd.read_csv(
            'pgm/tests/test_estimators/testdata/titanic_train.csv')
        self.titanic_data1 = self.titanic_data[[
            "Survived", "Sex", "Pclass", "Age", "Embarked"
        ]]
        self.titanic_data2 = self.titanic_data[["Survived", "Sex", "Pclass"]]
        self.est_titanic1 = HillClimbSearch(self.titanic_data1)
        self.est_titanic2 = HillClimbSearch(self.titanic_data2)
Пример #25
0
    def estimate(self):
        """
        Estimates the `BayesianModel` structure that fits best to the given data set,
        according to the scoring method supplied in the constructor.
        Exhaustively searches through all models. Only estimates network structure, no parametrization.

        Returns
        -------
        model: `BayesianModel` instance
            A `BayesianModel` with maximal score.

        Examples
        --------
        >>> import pandas as pd
        >>> import numpy as np
        >>> from pgm.estimators import ExhaustiveSearch
        >>> # create random data sample with 3 variables, where B and C are identical:
        >>> data = pd.DataFrame(np.random.randint(0, 5, size=(5000, 2)), columns=list('AB'))
        >>> data['C'] = data['B']
        >>> est = ExhaustiveSearch(data)
        >>> best_model = est.estimate()
        >>> best_model
        <pgm.models.BayesianModel.BayesianModel object at 0x7f695c535470>
        >>> best_model.edges()
        [('B', 'C')]
        """

        best_dag = max(self.all_dags(), key=self.scoring_method.score)

        best_model = BayesianModel()
        best_model.add_nodes_from(sorted(best_dag.nodes()))
        best_model.add_edges_from(sorted(best_dag.edges()))
        return best_model
Пример #26
0
    def get_model(self):
        model = BayesianModel(self.get_edges())
        model.name = self.network_name

        tabular_cpds = []
        for var, values in self.variable_CPD.items():
            evidence_card = [
                len(self.variable_states[evidence_var])
                for evidence_var in self.variable_parents[var]
            ]
            cpd = TabularCPD(var,
                             len(self.variable_states[var]),
                             values,
                             evidence=self.variable_parents[var],
                             evidence_card=evidence_card,
                             state_names=self.get_states())
            tabular_cpds.append(cpd)

        model.add_cpds(*tabular_cpds)

        for node, properties in self.variable_property.items():
            for prop in properties:
                if prop is not None:
                    prop_name, prop_value = map(lambda t: t.strip(),
                                                prop.split('='))
                    model.node[node][prop_name] = prop_value

        return model
Пример #27
0
    def get_model(self):
        """
        Returns the model instance of the ProbModel.

        Return
        ---------------
        model: an instance of BayesianModel.

        Examples
        -------
        >>> reader = ProbModelXMLReader()
        >>> reader.get_model()
        """
        if self.probnet.get('type') == "BayesianNetwork":
            model = BayesianModel(self.probnet['edges'].keys())

            tabular_cpds = []
            cpds = self.probnet['Potentials']
            for cpd in cpds:
                var = list(cpd['Variables'].keys())[0]
                states = self.probnet['Variables'][var]['States']
                evidence = cpd['Variables'][var]
                evidence_card = [
                    len(self.probnet['Variables'][evidence_var]['States'])
                    for evidence_var in evidence
                ]
                arr = list(map(float, cpd['Values'].split()))
                values = np.array(arr)
                values = values.reshape(
                    (len(states), values.size // len(states)))
                tabular_cpds.append(
                    TabularCPD(var, len(states), values, evidence,
                               evidence_card))

            model.add_cpds(*tabular_cpds)

            variables = model.nodes()
            for var in variables:
                for prop_name, prop_value in self.probnet['Variables'][
                        var].items():
                    model.node[var][prop_name] = prop_value

            edges = model.edges()
            for edge in edges:
                for prop_name, prop_value in self.probnet['edges'][edge].items(
                ):
                    model.edge[edge[0]][edge[1]][prop_name] = prop_value
            return model
        else:
            raise ValueError("Please specify only Bayesian Network.")
Пример #28
0
    def __init__(self, model):
        self.model = model
        model.check_model()

        if isinstance(model, JunctionTree):
            self.variables = set(chain(*model.nodes()))
        else:
            self.variables = model.nodes()

        self.cardinality = {}
        self.factors = defaultdict(list)

        if isinstance(model, BayesianModel):
            for node in model.nodes():
                cpd = model.get_cpds(node)
                cpd_as_factor = cpd.to_factor()
                self.cardinality[node] = cpd.variable_card

                for var in cpd.variables:
                    self.factors[var].append(cpd_as_factor)

        elif isinstance(model, (MarkovModel, FactorGraph, JunctionTree)):
            self.cardinality = model.get_cardinality()

            for factor in model.get_factors():
                for var in factor.variables:
                    self.factors[var].append(factor)

        elif isinstance(model, DynamicBayesianNetwork):
            self.start_bayesian_model = BayesianModel(model.get_intra_edges(0))
            self.start_bayesian_model.add_cpds(*model.get_cpds(time_slice=0))
            cpd_inter = [
                model.get_cpds(node) for node in model.get_interface_nodes(1)
            ]
            self.interface_nodes = model.get_interface_nodes(0)
            self.one_and_half_model = BayesianModel(model.get_inter_edges() +
                                                    model.get_intra_edges(1))
            self.one_and_half_model.add_cpds(*(model.get_cpds(time_slice=1) +
                                               cpd_inter))
Пример #29
0
    def minimal_imap(self, order):
        """
        Returns a Bayesian Model which is minimal IMap of the Joint Probability Distribution
        considering the order of the variables.

        Parameters
        ----------
        order: array-like
            The order of the random variables.

        Examples
        --------
        >>> import numpy as np
        >>> from pgm.factors.discrete import JointProbabilityDistribution
        >>> prob = JointProbabilityDistribution(['x1', 'x2', 'x3'], [2, 3, 2], np.ones(12)/12)
        >>> bayesian_model = prob.minimal_imap(order=['x2', 'x1', 'x3'])
        >>> bayesian_model
        <pgm.models.models.models at 0x7fd7440a9320>
        >>> bayesian_model.edges()
        [('x1', 'x3'), ('x2', 'x3')]
        """
        from pgm.models import BayesianModel

        def get_subsets(u):
            for r in range(len(u) + 1):
                for i in itertools.combinations(u, r):
                    yield i

        G = BayesianModel()
        for variable_index in range(len(order)):
            u = order[:variable_index]
            for subset in get_subsets(u):
                if (len(subset) < len(u) and
                        self.check_independence([order[variable_index]], set(u) - set(subset), subset, True)):
                    G.add_edges_from([(variable, order[variable_index]) for variable in subset])
        return G
Пример #30
0
    def get_model(self):
        """
        Returns the fitted bayesian model

        Example
        ----------
        >>> from pgm.readwrite import BIFReader
        >>> reader = BIFReader("bif_test.bif")
        >>> reader.get_model()
        <pgm.models.BayesianModel.BayesianModel object at 0x7f20af154320>
        """
        try:
            model = BayesianModel(self.variable_edges)
            model.name = self.network_name
            model.add_nodes_from(self.variable_names)

            tabular_cpds = []
            for var in sorted(self.variable_cpds.keys()):
                values = self.variable_cpds[var]
                cpd = TabularCPD(
                    var,
                    len(self.variable_states[var]),
                    values,
                    evidence=self.variable_parents[var],
                    evidence_card=[
                        len(self.variable_states[evidence_var])
                        for evidence_var in self.variable_parents[var]
                    ])
                tabular_cpds.append(cpd)

            model.add_cpds(*tabular_cpds)
            for node, properties in self.variable_properties.items():
                for prop in properties:
                    prop_name, prop_value = map(lambda t: t.strip(),
                                                prop.split('='))
                    model.node[node][prop_name] = prop_value

            return model

        except AttributeError:
            raise AttributeError(
                'First get states of variables, edges, parents and network name'
            )