def test_factor_reduce_statename(self):
        phi = DiscreteFactor(['speed', 'switch', 'time'],
                             [3, 2, 2], np.ones(12), state_names=self.sn1)
        phi.reduce([('speed', 'medium'), ('time', 'day')])
        self.assertEqual(phi.variables, ['switch'])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

        phi = DiscreteFactor(['speed', 'switch', 'time'],
                             [3, 2, 2], np.ones(12), state_names=self.sn1)
        phi = phi.reduce([('speed', 'medium'), ('time', 'day')], inplace=False)
        self.assertEqual(phi.variables, ['switch'])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

        phi = DiscreteFactor(['speed', 'switch', 'time'],
                             [3, 2, 2], np.ones(12), state_names=self.sn1)
        phi.reduce([('speed', 1), ('time', 0)])
        self.assertEqual(phi.variables, ['switch'])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

        phi = DiscreteFactor(['speed', 'switch', 'time'],
                             [3, 2, 2], np.ones(12), state_names=self.sn1)
        phi = phi.reduce([('speed', 1), ('time', 0)], inplace=False)
        self.assertEqual(phi.variables, ['switch'])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))
 def test_factorset_divide(self):
     phi1 = DiscreteFactor(['x1', 'x2', 'x3'], [2, 3, 2], range(1, 13))
     phi2 = DiscreteFactor(['x3', 'x4', 'x1'], [2, 2, 2], range(1, 9))
     factor_set1 = FactorSet(phi1, phi2)
     phi3 = DiscreteFactor(['x5', 'x6', 'x7'], [2, 2, 2], range(1, 9))
     phi4 = DiscreteFactor(['x5', 'x7', 'x8'], [2, 2, 2], range(1, 9))
     factor_set2 = FactorSet(phi3, phi4)
     factor_set3 = factor_set2.divide(factor_set1, inplace=False)
     self.assertEqual({phi3, phi4, phi1.identity_factor() / phi1, phi2.identity_factor() / phi2},
                      factor_set3.factors)
    def test_max_calibrate_clique_belief(self):
        belief_propagation = BeliefPropagation(self.junction_tree)
        belief_propagation.max_calibrate()
        clique_belief = belief_propagation.get_clique_beliefs()

        phi1 = DiscreteFactor(['A', 'B'], [2, 3], range(6))
        phi2 = DiscreteFactor(['B', 'C'], [3, 2], range(6))
        phi3 = DiscreteFactor(['C', 'D'], [2, 2], range(4))

        b_A_B = phi1 * (phi3.maximize(['D'], inplace=False) * phi2).maximize(['C'], inplace=False)
        b_B_C = phi2 * (phi1.maximize(['A'], inplace=False) * phi3.maximize(['D'], inplace=False))
        b_C_D = phi3 * (phi1.maximize(['A'], inplace=False) * phi2).maximize(['B'], inplace=False)

        np_test.assert_array_almost_equal(clique_belief[('A', 'B')].values, b_A_B.values)
        np_test.assert_array_almost_equal(clique_belief[('B', 'C')].values, b_B_C.values)
        np_test.assert_array_almost_equal(clique_belief[('C', 'D')].values, b_C_D.values)
    def test_max_calibrate_sepset_belief(self):
        belief_propagation = BeliefPropagation(self.junction_tree)
        belief_propagation.max_calibrate()
        sepset_belief = belief_propagation.get_sepset_beliefs()

        phi1 = DiscreteFactor(['A', 'B'], [2, 3], range(6))
        phi2 = DiscreteFactor(['B', 'C'], [3, 2], range(6))
        phi3 = DiscreteFactor(['C', 'D'], [2, 2], range(4))

        b_B = (phi1 * (phi3.maximize(['D'], inplace=False) *
                       phi2).maximize(['C'], inplace=False)).maximize(['A'], inplace=False)

        b_C = (phi2 * (phi1.maximize(['A'], inplace=False) *
                       phi3.maximize(['D'], inplace=False))).maximize(['B'], inplace=False)

        np_test.assert_array_almost_equal(sepset_belief[frozenset((('A', 'B'), ('B', 'C')))].values, b_B.values)
        np_test.assert_array_almost_equal(sepset_belief[frozenset((('B', 'C'), ('C', 'D')))].values, b_C.values)
    def setUp(self):
        self.sn2 = {'grade': ['A', 'B', 'F'],
                    'diff': ['high', 'low'],
                    'intel': ['poor', 'good', 'very good']}
        self.sn1 = {'speed': ['low', 'medium', 'high'],
                    'switch': ['on', 'off'],
                    'time': ['day', 'night']}

        self.phi1 = DiscreteFactor(['speed', 'switch', 'time'],
                                   [3, 2, 2], np.ones(12))
        self.phi2 = DiscreteFactor(['speed', 'switch', 'time'],
                                   [3, 2, 2], np.ones(12), state_names=self.sn1)

        self.cpd1 = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 3])
        self.cpd2 = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 3],
                               state_names=self.sn2)

        student = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        diff_cpd = TabularCPD('diff', 2, [[0.2, 0.8]])
        intel_cpd = TabularCPD('intel', 2, [[0.3, 0.7]])
        grade_cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1],
                                            [0.1, 0.1, 0.1, 0.1],
                                            [0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 2])
        student.add_cpds(diff_cpd, intel_cpd, grade_cpd)
        self.model1 = VariableElimination(student)
        self.model2 = VariableElimination(student, state_names=self.sn2)
Пример #6
0
    def test_calibrate_sepset_belief(self):
        belief_propagation = BeliefPropagation(self.junction_tree)
        belief_propagation.calibrate()
        sepset_belief = belief_propagation.get_sepset_beliefs()

        phi1 = DiscreteFactor(['A', 'B'], [2, 3], range(6))
        phi2 = DiscreteFactor(['B', 'C'], [3, 2], range(6))
        phi3 = DiscreteFactor(['C', 'D'], [2, 2], range(4))

        b_B = (phi1 *
               (phi3.marginalize(['D'], inplace=False) * phi2).marginalize(
                   ['C'], inplace=False)).marginalize(['A'], inplace=False)

        b_C = (phi2 * (phi1.marginalize(['A'], inplace=False) *
                       phi3.marginalize(['D'], inplace=False))).marginalize(
                           ['B'], inplace=False)

        np_test.assert_array_almost_equal(
            sepset_belief[frozenset((('A', 'B'), ('B', 'C')))].values,
            b_B.values)
        np_test.assert_array_almost_equal(
            sepset_belief[frozenset((('B', 'C'), ('C', 'D')))].values,
            b_C.values)
    def test_check_model3(self):
        self.graph.add_edges_from([('a', 'b'), ('b', 'c'), ('c', 'd'),
                                   ('d', 'a')])

        phi1 = DiscreteFactor(['a', 'c'], [1, 2], np.random.rand(2))
        self.graph.add_factors(phi1)
        self.assertRaises(ValueError, self.graph.check_model)
        self.graph.remove_factors(phi1)

        phi1 = DiscreteFactor(['a', 'b'], [1, 2], np.random.rand(2))
        phi2 = DiscreteFactor(['a', 'c'], [1, 2], np.random.rand(2))
        self.graph.add_factors(phi1, phi2)
        self.assertRaises(ValueError, self.graph.check_model)
        self.graph.remove_factors(phi1, phi2)

        phi1 = DiscreteFactor(['a', 'b'], [1, 2], np.random.rand(2))
        phi2 = DiscreteFactor(['b', 'c'], [2, 3], np.random.rand(6))
        phi3 = DiscreteFactor(['c', 'd'], [3, 4], np.random.rand(12))
        phi4 = DiscreteFactor(['d', 'a'], [4, 1], np.random.rand(4))
        phi5 = DiscreteFactor(['d', 'b'], [4, 2], np.random.rand(8))
        self.graph.add_factors(phi1, phi2, phi3, phi4, phi5)
        self.assertRaises(ValueError, self.graph.check_model)
        self.graph.remove_factors(phi1, phi2, phi3, phi4, phi5)
Пример #8
0
 def test_markov_inference_init(self):
     infer_markov = Inference(self.markov)
     self.assertEqual(set(infer_markov.variables), {'a', 'b', 'c', 'd'})
     self.assertEqual(infer_markov.cardinality, {
         'a': 2,
         'b': 2,
         'c': 2,
         'd': 2
     })
     self.assertEqual(
         infer_markov.factors, {
             'a': [
                 DiscreteFactor(['a', 'b'], [2, 2],
                                np.array([100, 1, 1, 100])),
                 DiscreteFactor(['a', 'c'], [2, 2],
                                np.array([40, 30, 100, 20]))
             ],
             'b': [
                 DiscreteFactor(['a', 'b'], [2, 2],
                                np.array([100, 1, 1, 100])),
                 DiscreteFactor(['b', 'd'], [2, 2],
                                np.array([1, 100, 100, 1]))
             ],
             'c': [
                 DiscreteFactor(['a', 'c'], [2, 2],
                                np.array([40, 30, 100, 20])),
                 DiscreteFactor(['c', 'd'], [2, 2],
                                np.array([60, 60, 40, 40]))
             ],
             'd': [
                 DiscreteFactor(['b', 'd'], [2, 2],
                                np.array([1, 100, 100, 1])),
                 DiscreteFactor(['c', 'd'], [2, 2],
                                np.array([60, 60, 40, 40]))
             ]
         })
Пример #9
0
        def __init__(self, intersection_set_variables, cluster_potential):
            """
            Initialization of the current cluster
            """

            # The variables with which the cluster is made of.
            self.cluster_variables = frozenset(cluster_potential.scope())

            # The cluster potentials must be specified before only.
            self.cluster_potential = copy.deepcopy(cluster_potential)

            # Generate intersection sets for this cluster; S(c)
            self.intersection_sets_for_cluster_c = [
                intersect.intersection(self.cluster_variables)
                for intersect in intersection_set_variables
                if intersect.intersection(self.cluster_variables)
            ]

            # Initialize messages from this cluster to its respective intersection sets
            # \lambda_{c \rightarrow \s} = 0
            self.message_from_cluster = {}
            for intersection in self.intersection_sets_for_cluster_c:
                # Present variable. It can be a node or an edge too. (that is ['A'] or ['A', 'C'] too)
                present_variables = list(intersection)

                # Present variables cardinality
                present_variables_card = cluster_potential.get_cardinality(
                    present_variables)
                present_variables_card = [
                    present_variables_card[var] for var in present_variables
                ]

                # We need to create a new factor whose messages are blank
                self.message_from_cluster[intersection] = DiscreteFactor(
                    present_variables,
                    present_variables_card,
                    np.zeros(np.prod(present_variables_card)),
                )
Пример #10
0
            cpd2.append(0)

# std = 0.25
# cpd1 = [mlab.normpdf(0, 0, std), mlab.normpdf(0, 1, std), mlab.normpdf(0, 2, std), mlab.normpdf(0, 3, std),
#         mlab.normpdf(1, 0, std), mlab.normpdf(1, 1, std), mlab.normpdf(1, 2, std), mlab.normpdf(1, 3, std),
#         mlab.normpdf(2, 0, std), mlab.normpdf(2, 1, std), mlab.normpdf(2, 2, std), mlab.normpdf(2, 3, std),
#         mlab.normpdf(3, 0, std), mlab.normpdf(3, 1, std), mlab.normpdf(3, 2, std), mlab.normpdf(3, 3, std)]

# cpd2 = [mlab.normpdf(0, 0, std), mlab.normpdf(0, 1, std), mlab.normpdf(0, 2, std),
#         mlab.normpdf(1, 0, std), mlab.normpdf(1, 1, std), mlab.normpdf(1, 2, std),
#         mlab.normpdf(2, 0, std), mlab.normpdf(2, 1, std), mlab.normpdf(2, 2, std)]

# p(doors_received|doors) - doors was original information, but we received a noisy version of it! #
# not necessary to normalise cpd for Markov network #
factor1 = DiscreteFactor(['buying', 'buying_received'],
                         cardinality=[4, 4],
                         values=cpd1)
factor2 = DiscreteFactor(['maint', 'maint_received'],
                         cardinality=[4, 4],
                         values=cpd1)
factor3 = DiscreteFactor(['doors', 'doors_received'],
                         cardinality=[4, 4],
                         values=cpd1)
factor4 = DiscreteFactor(['persons', 'persons_received'],
                         cardinality=[3, 3],
                         values=cpd2)
factor5 = DiscreteFactor(['safety', 'safety_received'],
                         cardinality=[3, 3],
                         values=cpd2)
factor6 = DiscreteFactor(['lug_boot', 'lug_boot_received'],
                         cardinality=[3, 3],
Пример #11
0
# THIS CODE HAS TO BE RUN ON PYTHON 2
# Otherwise, you will get wrong results

from pgmpy.models import MarkovModel
from pgmpy.factors.discrete import DiscreteFactor
from pgmpy.inference import BeliefPropagation
import numpy as np

# Construct a graph
PGM = MarkovModel()
PGM.add_nodes_from(['w1', 'w2', 'w3'])
PGM.add_edges_from([('w1', 'w2'), ('w2', 'w3')])
tr_matrix = np.array([1, 10, 3, 2, 1, 5, 3, 3, 2])
tr_matrix = np.array([1, 2, 3, 10, 1, 3, 3, 5, 2]).reshape(3, 3).T.reshape(-1)
phi = [DiscreteFactor(edge, [3, 3], tr_matrix) for edge in PGM.edges()]
print(phi[0])
print(phi[1])
PGM.add_factors(*phi)

# Calculate partition funtion
Z = PGM.get_partition_function()
print('The partition function is:', Z)

# Calibrate the click
belief_propagation = BeliefPropagation(PGM)
belief_propagation.calibrate()

# Output calibration result, which you should get
query = belief_propagation.query(variables=['w2'])
print('After calibration you should get the following mu(S):\n', query * Z)
Пример #12
0
data.previous.unique()
data['previous'] = data['previous'].astype('category')
data.previous.unique()
data.groupby(["education", "loan"]).size()

#---------------Construct Markov Model based on intuition---------------------

mark = MarkovModel([('education', 'loan'), ('education', 'housing'),
                    ('loan', 'y'), ('housing', 'y'), ('marital', 'y'),
                    ('default', 'y'), ('contact', 'month'), ('month', 'y'),
                    ('poutcome', 'y')])

#----------------Generate factors in the Markov model and add them------------
data.groupby(["education", "loan"]).size()
f1 = DiscreteFactor(["education", "loan"],
                    cardinality=[4, 2],
                    values=(584, 94, 1890, 416, 1176, 174, 180, 7))
data.groupby(["education", "housing"]).size()
f2 = DiscreteFactor(["education", "housing"],
                    cardinality=[4, 2],
                    values=(295, 383, 876, 1430, 687, 663, 104, 83))
data.groupby(["loan", "y"]).size()
f3 = DiscreteFactor(["loan", "y"],
                    cardinality=[2, 2],
                    values=(3352, 478, 648, 43))
data.groupby(["housing", "y"]).size()
f4 = DiscreteFactor(["housing", "y"],
                    cardinality=[2, 2],
                    values=(1661, 301, 2339, 220))
data.groupby(["marital", "y"]).size()
f5 = DiscreteFactor(["marital", "y"],
Пример #13
0
 def test_add_single_factor(self):
     self.graph.add_nodes_from(["a", "b", "c"])
     phi = DiscreteFactor(["a", "b"], [2, 2], range(4))
     self.graph.add_factors(phi)
     six.assertCountEqual(self, self.graph.factors, [phi])
Пример #14
0
 def test_partition_function_raises_error(self):
     self.graph.add_nodes_from(["a", "b", "c", "d"])
     phi1 = DiscreteFactor(["a", "b"], [2, 2], range(4))
     phi2 = DiscreteFactor(["b", "c"], [2, 2], range(4))
     self.graph.add_factors(phi1, phi2)
     self.assertRaises(ValueError, self.graph.get_partition_function)
Пример #15
0
def eval_partition_func_random_glass_spin(N):
    '''
	
	Inputs:
		-N: int, generate a random NxN glass spin model

	Outputs:

	'''
    G = MarkovModel()

    #create an NxN grid of nodes
    node_names = ['x%d%d' % (r, c) for r in range(N) for c in range(N)]
    print node_names
    G.add_nodes_from(node_names)

    #add an edge between each node and its 4 neighbors, except when the
    #node is on the grid border and has fewer than 4 neighbors
    edges = []
    for r in range(N):
        for c in range(N):
            if r < N - 1:
                edges.append(('x%d%d' % (r, c), 'x%d%d' % (r + 1, c)))
            if c < N - 1:
                edges.append(('x%d%d' % (r, c), 'x%d%d' % (r, c + 1)))
    assert (len(edges) == 2 * N * (N - 1))
    print edges
    print "number edges =", len(edges)
    G.add_edges_from(edges)

    all_factors = []
    #sample single variable potentials
    STRONG_LOCAL_FIELD = True
    if STRONG_LOCAL_FIELD:
        f = 1  #strong local field
    else:
        f = .1  #weak local field
    for node in node_names:
        #sample in the half open interval [-f, f), gumbel paper actually uses closed interval, shouldn't matter
        theta_i = np.random.uniform(low=-f, high=f)
        factor_vals = np.array([np.exp(-theta_i), np.exp(theta_i)])
        all_factors.append(
            DiscreteFactor([node], cardinality=[2], values=factor_vals))

    #sample two variable potentials
    theta_ij_max = 1.5
    for edge in edges:
        #sample in the half open interval [0, theta_ij_max)
        theta_ij = np.random.uniform(low=0.0, high=theta_ij_max)
        factor_vals = np.array([[np.exp(theta_ij),
                                 np.exp(-theta_ij)],
                                [np.exp(-theta_ij),
                                 np.exp(theta_ij)]])
        all_factors.append(
            DiscreteFactor(edge, cardinality=[2, 2], values=factor_vals))

    G.add_factors(*all_factors)
    #print "factors:", G.get_factors
    #	partition_function_enumeration = G.get_partition_function()
    partition_function_bp = get_partition_function_BP(G)
    #	print "partition function enumeration =", partition_function_enumeration
    print "partition function bp =", partition_function_bp
Пример #16
0
    def test_query_multiple_times(self):
        # This just tests that the models are not getting modified while querying them
        query_result = self.bayesian_inference.query(["J"])
        query_result = self.bayesian_inference.query(["J"])
        self.assertEqual(
            query_result,
            DiscreteFactor(variables=["J"],
                           cardinality=[2],
                           values=np.array([0.416, 0.584])),
        )

        query_result = self.bayesian_inference.query(["Q", "J"])
        query_result = self.bayesian_inference.query(["Q", "J"])
        self.assertEqual(
            query_result,
            DiscreteFactor(
                variables=["J", "Q"],
                cardinality=[2, 2],
                values=np.array([[0.3744, 0.0416], [0.1168, 0.4672]]),
            ),
        )

        query_result = self.bayesian_inference.query(variables=["J"],
                                                     evidence={
                                                         "A": 0,
                                                         "R": 1
                                                     })
        query_result = self.bayesian_inference.query(variables=["J"],
                                                     evidence={
                                                         "A": 0,
                                                         "R": 1
                                                     })
        self.assertEqual(
            query_result,
            DiscreteFactor(variables=["J"], cardinality=[2], values=[0.6,
                                                                     0.4]),
        )

        query_result = self.bayesian_inference.query(variables=["J", "Q"],
                                                     evidence={
                                                         "A": 0,
                                                         "R": 0,
                                                         "G": 0,
                                                         "L": 1
                                                     })
        query_result = self.bayesian_inference.query(variables=["J", "Q"],
                                                     evidence={
                                                         "A": 0,
                                                         "R": 0,
                                                         "G": 0,
                                                         "L": 1
                                                     })
        self.assertEqual(
            query_result,
            DiscreteFactor(
                variables=["J", "Q"],
                cardinality=[2, 2],
                values=np.array([[0.73636364, 0.08181818],
                                 [0.03636364, 0.14545455]]),
            ),
        )
Пример #17
0
 def test_get_partition_function(self):
     self.graph.add_edges_from([[('a', 'b'), ('b', 'c')]])
     phi1 = DiscreteFactor(['a', 'b'], [2, 2], range(4))
     phi2 = DiscreteFactor(['b', 'c'], [2, 2], range(4))
     self.graph.add_factors(phi1, phi2)
     self.assertEqual(self.graph.get_partition_function(), 22.0)
Пример #18
0
import numpy as np
from pgmpy.models import FactorGraph
from pgmpy.factors.discrete import DiscreteFactor
from pgmpy.inference import BeliefPropagation

G = FactorGraph()
G.add_node(0)
G.add_node(1)
G.add_node(2)

f01 = DiscreteFactor([0, 1], [2, 2], np.random.rand(4))
f02 = DiscreteFactor([0, 2], [2, 2], np.random.rand(4))
f12 = DiscreteFactor([1, 2], [2, 2], np.random.rand(4))
G.add_factors(f01)
G.add_factors(f02)
G.add_factors(f12)

G.add_edges_from([(0, f01), (1, f01), (0, f02), (2, f02), (1, f12), (2, f12)])
bp = BeliefPropagation(G)
bp.calibrate()
Пример #19
0
 def test_add_multiple_factors(self):
     self.graph.add_edges_from([[('a', 'b'), ('b', 'c')]])
     phi1 = DiscreteFactor(['a', 'b'], [2, 2], np.random.rand(4))
     phi2 = DiscreteFactor(['b', 'c'], [2, 2], np.random.rand(4))
     self.graph.add_factors(phi1, phi2)
     six.assertCountEqual(self, self.graph.factors, [phi1, phi2])
Пример #20
0
 def test_add_single_factor_raises_error(self):
     self.graph.add_node(('a', 'b'))
     phi1 = DiscreteFactor(['b', 'c'], [2, 2], np.random.rand(4))
     self.assertRaises(ValueError, self.graph.add_factors, phi1)
Пример #21
0
 def test_add_single_factor(self):
     self.graph.add_node(('a', 'b'))
     phi1 = DiscreteFactor(['a', 'b'], [2, 2], np.random.rand(4))
     self.graph.add_factors(phi1)
     six.assertCountEqual(self, self.graph.factors, [phi1])
    def test_copy(self):
        # Setup the original graph
        self.graph.add_nodes_from(['a', 'b'])
        self.graph.add_edges_from([('a', 'b')])

        # Generate the copy
        copy = self.graph.copy()

        # Ensure the copied model is correct
        self.assertTrue(copy.check_model())

        # Basic sanity checks to ensure the graph was copied correctly
        self.assertEqual(len(copy.nodes()), 2)
        self.assertListEqual(copy.neighbors('a'), ['b'])
        self.assertListEqual(copy.neighbors('b'), ['a'])

        # Modify the original graph ...
        self.graph.add_nodes_from(['c'])
        self.graph.add_edges_from([('c', 'b')])

        # ... and ensure none of those changes get propagated
        self.assertEqual(len(copy.nodes()), 2)
        self.assertListEqual(copy.neighbors('a'), ['b'])
        self.assertListEqual(copy.neighbors('b'), ['a'])
        with self.assertRaises(nx.NetworkXError):
            copy.neighbors('c')

        # Ensure the copy has no factors at this point
        self.assertEqual(len(copy.get_factors()), 0)

        # Add factors to the original graph
        phi1 = DiscreteFactor(['a', 'b'], [2, 2], [[0.3, 0.7], [0.9, 0.1]])
        self.graph.add_factors(phi1)

        # The factors should not get copied over
        with self.assertRaises(AssertionError):
            self.assertListEqual(copy.get_factors(), self.graph.get_factors())

        # Create a fresh copy
        del copy
        copy = self.graph.copy()
        self.assertListEqual(copy.get_factors(), self.graph.get_factors())

        # If we change factors in the original, it should not be passed to the clone
        phi1.values = np.array([[0.5, 0.5], [0.5, 0.5]])
        self.assertNotEqual(self.graph.get_factors(), copy.get_factors())

        # Start with a fresh copy
        del copy
        self.graph.add_nodes_from(['d'])
        copy = self.graph.copy()

        # Ensure an unconnected node gets copied over as well
        self.assertEqual(len(copy.nodes()), 4)
        self.assertListEqual(self.graph.neighbors('a'), ['b'])
        self.assertTrue('a' in self.graph.neighbors('b'))
        self.assertTrue('c' in self.graph.neighbors('b'))
        self.assertListEqual(self.graph.neighbors('c'), ['b'])
        self.assertListEqual(self.graph.neighbors('d'), [])

        # Verify that changing the copied model should not update the original
        copy.add_nodes_from(['e'])
        self.assertListEqual(copy.neighbors('e'), [])
        with self.assertRaises(nx.NetworkXError):
            self.graph.neighbors('e')

        # Verify that changing edges in the copy doesn't create edges in the original
        copy.add_edges_from([('d', 'b')])

        self.assertTrue('a' in copy.neighbors('b'))
        self.assertTrue('c' in copy.neighbors('b'))
        self.assertTrue('d' in copy.neighbors('b'))

        self.assertTrue('a' in self.graph.neighbors('b'))
        self.assertTrue('c' in self.graph.neighbors('b'))
        self.assertFalse('d' in self.graph.neighbors('b'))

        # If we remove factors from the copied model, it should not reflect in the original
        copy.remove_factors(phi1)
        self.assertEqual(len(self.graph.get_factors()), 1)
        self.assertEqual(len(copy.get_factors()), 0)
class StateNameDecorator(unittest.TestCase):
    def setUp(self):
        self.sn2 = {'grade': ['A', 'B', 'F'],
                    'diff': ['high', 'low'],
                    'intel': ['poor', 'good', 'very good']}
        self.sn1 = {'speed': ['low', 'medium', 'high'],
                    'switch': ['on', 'off'],
                    'time': ['day', 'night']}

        self.phi1 = DiscreteFactor(['speed', 'switch', 'time'],
                                   [3, 2, 2], np.ones(12))
        self.phi2 = DiscreteFactor(['speed', 'switch', 'time'],
                                   [3, 2, 2], np.ones(12), state_names=self.sn1)

        self.cpd1 = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 3])
        self.cpd2 = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 3],
                               state_names=self.sn2)

        student = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        diff_cpd = TabularCPD('diff', 2, [[0.2, 0.8]])
        intel_cpd = TabularCPD('intel', 2, [[0.3, 0.7]])
        grade_cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1],
                                            [0.1, 0.1, 0.1, 0.1],
                                            [0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 2])
        student.add_cpds(diff_cpd, intel_cpd, grade_cpd)
        self.model1 = VariableElimination(student)
        self.model2 = VariableElimination(student, state_names=self.sn2)

    def test_assignment_statename(self):
        req_op1 = [[('speed', 'low'), ('switch', 'on'), ('time', 'night')],
                   [('speed', 'low'), ('switch', 'off'), ('time', 'day')]]
        req_op2 = [[('speed', 0), ('switch', 0), ('time', 1)],
                   [('speed', 0), ('switch', 1), ('time', 0)]]

        self.assertEqual(self.phi1.assignment([1, 2]), req_op2)
        self.assertEqual(self.phi2.assignment([1, 2]), req_op1)

    def test_factor_reduce_statename(self):
        phi = DiscreteFactor(['speed', 'switch', 'time'],
                             [3, 2, 2], np.ones(12), state_names=self.sn1)
        phi.reduce([('speed', 'medium'), ('time', 'day')])
        self.assertEqual(phi.variables, ['switch'])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

        phi = DiscreteFactor(['speed', 'switch', 'time'],
                             [3, 2, 2], np.ones(12), state_names=self.sn1)
        phi = phi.reduce([('speed', 'medium'), ('time', 'day')], inplace=False)
        self.assertEqual(phi.variables, ['switch'])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

        phi = DiscreteFactor(['speed', 'switch', 'time'],
                             [3, 2, 2], np.ones(12), state_names=self.sn1)
        phi.reduce([('speed', 1), ('time', 0)])
        self.assertEqual(phi.variables, ['switch'])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

        phi = DiscreteFactor(['speed', 'switch', 'time'],
                             [3, 2, 2], np.ones(12), state_names=self.sn1)
        phi = phi.reduce([('speed', 1), ('time', 0)], inplace=False)
        self.assertEqual(phi.variables, ['switch'])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

    def test_reduce_cpd_statename(self):
        cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                         evidence=['diff', 'intel'], evidence_card=[2, 3],
                         state_names=self.sn2)
        cpd.reduce([('diff', 'high')])
        self.assertEqual(cpd.variable, 'grade')
        self.assertEqual(cpd.variables, ['grade', 'intel'])
        np_test.assert_array_equal(cpd.get_values(), np.array([[0.1, 0.1, 0.1],
                                                            [0.1, 0.1, 0.1],
                                                            [0.8, 0.8, 0.8]]))

        cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                         evidence=['diff', 'intel'], evidence_card=[2, 3],
                         state_names=self.sn2)
        cpd.reduce([('diff', 0)])
        self.assertEqual(cpd.variable, 'grade')
        self.assertEqual(cpd.variables, ['grade', 'intel'])
        np_test.assert_array_equal(cpd.get_values(), np.array([[0.1, 0.1, 0.1],
                                                            [0.1, 0.1, 0.1],
                                                            [0.8, 0.8, 0.8]]))

        cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                         evidence=['diff', 'intel'], evidence_card=[2, 3],
                         state_names=self.sn2)
        cpd = cpd.reduce([('diff', 'high')], inplace=False)
        self.assertEqual(cpd.variable, 'grade')
        self.assertEqual(cpd.variables, ['grade', 'intel'])
        np_test.assert_array_equal(cpd.get_values(), np.array([[0.1, 0.1, 0.1],
                                                            [0.1, 0.1, 0.1],
                                                            [0.8, 0.8, 0.8]]))

        cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                         evidence=['diff', 'intel'], evidence_card=[2, 3],
                         state_names=self.sn2)
        cpd = cpd.reduce([('diff', 0)], inplace=False)
        self.assertEqual(cpd.variable, 'grade')
        self.assertEqual(cpd.variables, ['grade', 'intel'])
        np_test.assert_array_equal(cpd.get_values(), np.array([[0.1, 0.1, 0.1],
                                                            [0.1, 0.1, 0.1],
                                                            [0.8, 0.8, 0.8]]))

    def test_inference_query_statename(self):
        inf_op1 = self.model2.query(['grade'], evidence={'intel': 'poor'})
        inf_op2 = self.model2.query(['grade'], evidence={'intel': 0})
        req_op = {'grade': DiscreteFactor(['grade'], [3], np.array([0.1, 0.1, 0.8]))}

        self.assertEqual(inf_op1, inf_op2)
        self.assertEqual(inf_op1, req_op)
        self.assertEqual(inf_op1, req_op)

        inf_op1 = self.model2.map_query(['grade'], evidence={'intel': 'poor'})
        inf_op2 = self.model2.map_query(['grade'], evidence={'intel': 0})
        req_op = {'grade': 'F'}

        self.assertEqual(inf_op1, inf_op2)
        self.assertEqual(inf_op1, req_op)
        self.assertEqual(inf_op1, req_op)
 def test_add_factor_raises_error(self):
     self.graph.add_edges_from([('Alice', 'Bob'), ('Bob', 'Charles'),
                                ('Charles', 'Debbie'), ('Debbie', 'Alice')])
     factor = DiscreteFactor(['Alice', 'Bob', 'John'], [2, 2, 2], np.random.rand(8))
     self.assertRaises(ValueError, self.graph.add_factors, factor)
 def setUp(self):
     self.phi1 = DiscreteFactor(['x1', 'x2', 'x3'], [2, 3, 2], range(12))
     self.phi2 = DiscreteFactor(['x3', 'x4', 'x1'], [2, 2, 2], range(8))
     self.phi3 = DiscreteFactor(['x5', 'x6', 'x7'], [2, 2, 2], range(8))
     self.phi4 = DiscreteFactor(['x5', 'x7', 'x8'], [2, 2, 2], range(8))
    def to_junction_tree(self):
        """
        Creates a junction tree (or clique tree) for a given markov model.

        For a given markov model (H) a junction tree (G) is a graph
        1. where each node in G corresponds to a maximal clique in H
        2. each sepset in G separates the variables strictly on one side of the
        edge to other.

        Examples
        --------
        >>> from pgmpy.models import MarkovModel
        >>> from pgmpy.factors.discrete import DiscreteFactor
        >>> mm = MarkovModel()
        >>> mm.add_nodes_from(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7'])
        >>> mm.add_edges_from([('x1', 'x3'), ('x1', 'x4'), ('x2', 'x4'),
        ...                    ('x2', 'x5'), ('x3', 'x6'), ('x4', 'x6'),
        ...                    ('x4', 'x7'), ('x5', 'x7')])
        >>> phi = [DiscreteFactor(edge, [2, 2], np.random.rand(4)) for edge in mm.edges()]
        >>> mm.add_factors(*phi)
        >>> junction_tree = mm.to_junction_tree()
        """
        from pgmpy.models import JunctionTree

        # Check whether the model is valid or not
        self.check_model()

        # Triangulate the graph to make it chordal
        triangulated_graph = self.triangulate()

        # Find maximal cliques in the chordal graph
        cliques = list(map(tuple, nx.find_cliques(triangulated_graph)))

        # If there is only 1 clique, then the junction tree formed is just a
        # clique tree with that single clique as the node
        if len(cliques) == 1:
            clique_trees = JunctionTree()
            clique_trees.add_node(cliques[0])

        # Else if the number of cliques is more than 1 then create a complete
        # graph with all the cliques as nodes and weight of the edges being
        # the length of sepset between two cliques
        elif len(cliques) >= 2:
            complete_graph = UndirectedGraph()
            edges = list(itertools.combinations(cliques, 2))
            weights = list(
                map(lambda x: len(set(x[0]).intersection(set(x[1]))), edges))
            for edge, weight in zip(edges, weights):
                complete_graph.add_edge(*edge, weight=-weight)

            # Create clique trees by minimum (or maximum) spanning tree method
            clique_trees = JunctionTree(
                nx.minimum_spanning_tree(complete_graph).edges())

        # Check whether the factors are defined for all the random variables or not
        all_vars = itertools.chain(
            *[factor.scope() for factor in self.factors])
        if set(all_vars) != set(self.nodes()):
            ValueError(
                "DiscreteFactor for all the random variables not specified")

        # Dictionary stating whether the factor is used to create clique
        # potential or not
        # If false, then it is not used to create any clique potential
        is_used = {factor: False for factor in self.factors}

        for node in clique_trees.nodes():
            clique_factors = []
            for factor in self.factors:
                # If the factor is not used in creating any clique potential as
                # well as has any variable of the given clique in its scope,
                # then use it in creating clique potential
                if not is_used[factor] and set(factor.scope()).issubset(node):
                    clique_factors.append(factor)
                    is_used[factor] = True

            # To compute clique potential, initially set it as unity factor
            var_card = [self.get_cardinality()[x] for x in node]
            clique_potential = DiscreteFactor(node, var_card,
                                              np.ones(np.product(var_card)))
            # multiply it with the factors associated with the variables present
            # in the clique (or node)
            # Checking if there's clique_factors, to handle the case when clique_factors
            # is empty, otherwise factor_product with throw an error [ref #889]
            if clique_factors:
                clique_potential *= factor_product(*clique_factors)
            clique_trees.add_factors(clique_potential)

        if not all(is_used.values()):
            raise ValueError(
                "All the factors were not used to create Junction Tree."
                "Extra factors are defined.")

        return clique_trees
Пример #27
0
class StateNameDecorator(unittest.TestCase):
    def setUp(self):
        self.sn2 = {
            'grade': ['A', 'B', 'F'],
            'diff': ['high', 'low'],
            'intel': ['poor', 'good', 'very good']
        }
        self.sn1 = {
            'speed': ['low', 'medium', 'high'],
            'switch': ['on', 'off'],
            'time': ['day', 'night']
        }

        self.phi1 = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2],
                                   np.ones(12))
        self.phi2 = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2],
                                   np.ones(12),
                                   state_names=self.sn1)

        self.cpd1 = TabularCPD(
            'grade',
            3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
            evidence=['diff', 'intel'],
            evidence_card=[2, 3])
        self.cpd2 = TabularCPD(
            'grade',
            3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
            evidence=['diff', 'intel'],
            evidence_card=[2, 3],
            state_names=self.sn2)

        student = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        diff_cpd = TabularCPD('diff', 2, [[0.2, 0.8]])
        intel_cpd = TabularCPD('intel', 2, [[0.3, 0.7]])
        grade_cpd = TabularCPD(
            'grade',
            3,
            [[0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8]],
            evidence=['diff', 'intel'],
            evidence_card=[2, 2])
        student.add_cpds(diff_cpd, intel_cpd, grade_cpd)
        self.model1 = VariableElimination(student)
        self.model2 = VariableElimination(student, state_names=self.sn2)

    def test_assignment_statename(self):
        req_op1 = [[('speed', 'low'), ('switch', 'on'), ('time', 'night')],
                   [('speed', 'low'), ('switch', 'off'), ('time', 'day')]]
        req_op2 = [[('speed', 0), ('switch', 0), ('time', 1)],
                   [('speed', 0), ('switch', 1), ('time', 0)]]

        self.assertEqual(self.phi1.assignment([1, 2]), req_op2)
        self.assertEqual(self.phi2.assignment([1, 2]), req_op1)

    def test_factor_reduce_statename(self):
        phi = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2],
                             np.ones(12),
                             state_names=self.sn1)
        phi.reduce([('speed', 'medium'), ('time', 'day')])
        self.assertEqual(phi.variables, ['switch'])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

        phi = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2],
                             np.ones(12),
                             state_names=self.sn1)
        phi = phi.reduce([('speed', 'medium'), ('time', 'day')], inplace=False)
        self.assertEqual(phi.variables, ['switch'])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

        phi = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2],
                             np.ones(12),
                             state_names=self.sn1)
        phi.reduce([('speed', 1), ('time', 0)])
        self.assertEqual(phi.variables, ['switch'])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

        phi = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2],
                             np.ones(12),
                             state_names=self.sn1)
        phi = phi.reduce([('speed', 1), ('time', 0)], inplace=False)
        self.assertEqual(phi.variables, ['switch'])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

    def test_reduce_cpd_statename(self):
        cpd = TabularCPD(
            'grade',
            3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
            evidence=['diff', 'intel'],
            evidence_card=[2, 3],
            state_names=self.sn2)
        cpd.reduce([('diff', 'high')])
        self.assertEqual(cpd.variable, 'grade')
        self.assertEqual(cpd.variables, ['grade', 'intel'])
        np_test.assert_array_equal(
            cpd.get_values(),
            np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.8, 0.8, 0.8]]))

        cpd = TabularCPD(
            'grade',
            3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
            evidence=['diff', 'intel'],
            evidence_card=[2, 3],
            state_names=self.sn2)
        cpd.reduce([('diff', 0)])
        self.assertEqual(cpd.variable, 'grade')
        self.assertEqual(cpd.variables, ['grade', 'intel'])
        np_test.assert_array_equal(
            cpd.get_values(),
            np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.8, 0.8, 0.8]]))

        cpd = TabularCPD(
            'grade',
            3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
            evidence=['diff', 'intel'],
            evidence_card=[2, 3],
            state_names=self.sn2)
        cpd = cpd.reduce([('diff', 'high')], inplace=False)
        self.assertEqual(cpd.variable, 'grade')
        self.assertEqual(cpd.variables, ['grade', 'intel'])
        np_test.assert_array_equal(
            cpd.get_values(),
            np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.8, 0.8, 0.8]]))

        cpd = TabularCPD(
            'grade',
            3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
            evidence=['diff', 'intel'],
            evidence_card=[2, 3],
            state_names=self.sn2)
        cpd = cpd.reduce([('diff', 0)], inplace=False)
        self.assertEqual(cpd.variable, 'grade')
        self.assertEqual(cpd.variables, ['grade', 'intel'])
        np_test.assert_array_equal(
            cpd.get_values(),
            np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.8, 0.8, 0.8]]))

    def test_inference_query_statename(self):
        inf_op1 = self.model2.query(['grade'], evidence={'intel': 'poor'})
        inf_op2 = self.model2.query(['grade'], evidence={'intel': 0})
        req_op = {
            'grade': DiscreteFactor(['grade'], [3], np.array([0.1, 0.1, 0.8]))
        }

        self.assertEqual(inf_op1, inf_op2)
        self.assertEqual(inf_op1, req_op)
        self.assertEqual(inf_op1, req_op)

        inf_op1 = self.model2.map_query(['grade'], evidence={'intel': 'poor'})
        inf_op2 = self.model2.map_query(['grade'], evidence={'intel': 0})
        req_op = {'grade': 'F'}

        self.assertEqual(inf_op1, inf_op2)
        self.assertEqual(inf_op1, req_op)
        self.assertEqual(inf_op1, req_op)
Пример #28
0
 def test_add_multiple_factors(self):
     self.graph.add_nodes_from(["a", "b", "c"])
     phi1 = DiscreteFactor(["a", "b"], [2, 2], range(4))
     phi2 = DiscreteFactor(["b", "c"], [2, 2], range(4))
     self.graph.add_factors(phi1, phi2)
     six.assertCountEqual(self, self.graph.factors, [phi1, phi2])
Пример #29
0
#####phi = [DiscreteFactor(edge, cardinality=[2, 2],
#####	   values=np.array([[1,2],
#####	   					[3,4]])) for edge in G.edges()]
#G.add_nodes_from(['z1', 'z2'])
#G.add_edges_from([('z1', 'z2')])
##phi = [DiscreteFactor(edge, cardinality=[2, 2], values=np.random.rand(4)) for edge in G.edges()]
#phi = [DiscreteFactor(edge, cardinality=[2, 2],
#	   values=np.array([[1,1],
#	   					[1,1]])) for edge in G.edges()]
#	   values=np.array([[1,1],
#	   					[1,1]])) for edge in G.edges()]

phi = [
    DiscreteFactor(['x2', 'x1'],
                   cardinality=[2, 2],
                   values=np.array([[1, 2], [3, 4]])),
    DiscreteFactor(['x3', 'x1'],
                   cardinality=[2, 2],
                   values=np.array([[1, 2], [3, 4]])),
    DiscreteFactor(['x1'], cardinality=[2], values=np.array([2, 2]))
]

G.add_factors(*phi)
print "factors:", G.get_factors
print "partition function =", G.get_partition_function()


def eval_partition_func_random_glass_spin(N):
    '''
	
Пример #30
0
    def test_copy(self):
        # Setup the original graph
        self.graph.add_nodes_from(["a", "b"])
        self.graph.add_edges_from([("a", "b")])

        # Generate the copy
        copy = self.graph.copy()

        # Ensure the copied model is correct
        self.assertTrue(copy.check_model())

        # Basic sanity checks to ensure the graph was copied correctly
        self.assertEqual(len(copy.nodes()), 2)
        self.assertListEqual(list(copy.neighbors("a")), ["b"])
        self.assertListEqual(list(copy.neighbors("b")), ["a"])

        # Modify the original graph ...
        self.graph.add_nodes_from(["c"])
        self.graph.add_edges_from([("c", "b")])

        # ... and ensure none of those changes get propagated
        self.assertEqual(len(copy.nodes()), 2)
        self.assertListEqual(list(copy.neighbors("a")), ["b"])
        self.assertListEqual(list(copy.neighbors("b")), ["a"])
        with self.assertRaises(nx.NetworkXError):
            list(copy.neighbors("c"))

        # Ensure the copy has no factors at this point
        self.assertEqual(len(copy.get_factors()), 0)

        # Add factors to the original graph
        phi1 = DiscreteFactor(["a", "b"], [2, 2], [[0.3, 0.7], [0.9, 0.1]])
        self.graph.add_factors(phi1)

        # The factors should not get copied over
        with self.assertRaises(AssertionError):
            self.assertListEqual(list(copy.get_factors()),
                                 self.graph.get_factors())

        # Create a fresh copy
        del copy
        copy = self.graph.copy()
        self.assertListEqual(list(copy.get_factors()),
                             self.graph.get_factors())

        # If we change factors in the original, it should not be passed to the clone
        phi1.values = np.array([[0.5, 0.5], [0.5, 0.5]])
        self.assertNotEqual(self.graph.get_factors(), copy.get_factors())

        # Start with a fresh copy
        del copy
        self.graph.add_nodes_from(["d"])
        copy = self.graph.copy()

        # Ensure an unconnected node gets copied over as well
        self.assertEqual(len(copy.nodes()), 4)
        self.assertListEqual(list(self.graph.neighbors("a")), ["b"])
        self.assertTrue("a" in self.graph.neighbors("b"))
        self.assertTrue("c" in self.graph.neighbors("b"))
        self.assertListEqual(list(self.graph.neighbors("c")), ["b"])
        self.assertListEqual(list(self.graph.neighbors("d")), [])

        # Verify that changing the copied model should not update the original
        copy.add_nodes_from(["e"])
        self.assertListEqual(list(copy.neighbors("e")), [])
        with self.assertRaises(nx.NetworkXError):
            self.graph.neighbors("e")

        # Verify that changing edges in the copy doesn't create edges in the original
        copy.add_edges_from([("d", "b")])

        self.assertTrue("a" in copy.neighbors("b"))
        self.assertTrue("c" in copy.neighbors("b"))
        self.assertTrue("d" in copy.neighbors("b"))

        self.assertTrue("a" in self.graph.neighbors("b"))
        self.assertTrue("c" in self.graph.neighbors("b"))
        self.assertFalse("d" in self.graph.neighbors("b"))

        # If we remove factors from the copied model, it should not reflect in the original
        copy.remove_factors(phi1)
        self.assertEqual(len(self.graph.get_factors()), 1)
        self.assertEqual(len(copy.get_factors()), 0)
Пример #31
0
class TestFactorSet(unittest.TestCase):
    def setUp(self):
        self.phi1 = DiscreteFactor(["x1", "x2", "x3"], [2, 3, 2], range(12))
        self.phi2 = DiscreteFactor(["x3", "x4", "x1"], [2, 2, 2], range(8))
        self.phi3 = DiscreteFactor(["x5", "x6", "x7"], [2, 2, 2], range(8))
        self.phi4 = DiscreteFactor(["x5", "x7", "x8"], [2, 2, 2], range(8))

    def test_class_init(self):
        phi1 = DiscreteFactor(["x1", "x2", "x3"], [2, 3, 2], range(12))
        phi2 = DiscreteFactor(["x3", "x4", "x1"], [2, 2, 2], range(8))
        factor_set1 = FactorSet(phi1, phi2)
        self.assertEqual({phi1, phi2}, factor_set1.get_factors())

    def test_factorset_add_remove_factors(self):
        self.factor_set1 = FactorSet()
        self.factor_set1.add_factors(self.phi1, self.phi2)
        self.assertEqual({self.phi1, self.phi2},
                         self.factor_set1.get_factors())
        self.factor_set1.remove_factors(self.phi2)
        self.assertEqual({self.phi1}, self.factor_set1.get_factors())

    def test_factorset_product(self):
        factor_set1 = FactorSet(self.phi1, self.phi2)
        factor_set2 = FactorSet(self.phi3, self.phi4)
        factor_set3 = factor_set2.product(factor_set1, inplace=False)
        self.assertEqual({self.phi1, self.phi2, self.phi3, self.phi4},
                         factor_set3.factors)

    def test_factorset_divide(self):
        phi1 = DiscreteFactor(["x1", "x2", "x3"], [2, 3, 2], range(1, 13))
        phi2 = DiscreteFactor(["x3", "x4", "x1"], [2, 2, 2], range(1, 9))
        factor_set1 = FactorSet(phi1, phi2)
        phi3 = DiscreteFactor(["x5", "x6", "x7"], [2, 2, 2], range(1, 9))
        phi4 = DiscreteFactor(["x5", "x7", "x8"], [2, 2, 2], range(1, 9))
        factor_set2 = FactorSet(phi3, phi4)
        factor_set3 = factor_set2.divide(factor_set1, inplace=False)
        self.assertEqual(
            {
                phi3, phi4,
                phi1.identity_factor() / phi1,
                phi2.identity_factor() / phi2
            },
            factor_set3.factors,
        )

    def test_factorset_marginalize_inplace(self):
        factor_set = FactorSet(self.phi1, self.phi2, self.phi3, self.phi4)
        factor_set.marginalize(["x1", "x5"], inplace=True)
        phi1_equivalent_in_factor_set = list(
            filter(lambda x: set(x.scope()) == {"x2", "x3"},
                   factor_set.factors))[0]
        self.assertEqual(self.phi1.marginalize(["x1"], inplace=False),
                         phi1_equivalent_in_factor_set)
        phi2_equivalent_in_factor_set = list(
            filter(lambda x: set(x.scope()) == {"x4", "x3"},
                   factor_set.factors))[0]
        self.assertEqual(self.phi2.marginalize(["x1"], inplace=False),
                         phi2_equivalent_in_factor_set)
        phi3_equivalent_in_factor_set = list(
            filter(lambda x: set(x.scope()) == {"x6", "x7"},
                   factor_set.factors))[0]
        self.assertEqual(self.phi3.marginalize(["x5"], inplace=False),
                         phi3_equivalent_in_factor_set)
        phi4_equivalent_in_factor_set = list(
            filter(lambda x: set(x.scope()) == {"x8", "x7"},
                   factor_set.factors))[0]
        self.assertEqual(self.phi4.marginalize(["x5"], inplace=False),
                         phi4_equivalent_in_factor_set)

    def test_factorset_marginalize_not_inplace(self):
        factor_set = FactorSet(self.phi1, self.phi2, self.phi3, self.phi4)
        new_factor_set = factor_set.marginalize(["x1", "x5"], inplace=False)
        phi1_equivalent_in_factor_set = list(
            filter(lambda x: set(x.scope()) == {"x2", "x3"},
                   new_factor_set.factors))[0]
        self.assertEqual(self.phi1.marginalize(["x1"], inplace=False),
                         phi1_equivalent_in_factor_set)
        phi2_equivalent_in_factor_set = list(
            filter(lambda x: set(x.scope()) == {"x4", "x3"},
                   new_factor_set.factors))[0]
        self.assertEqual(self.phi2.marginalize(["x1"], inplace=False),
                         phi2_equivalent_in_factor_set)
        phi3_equivalent_in_factor_set = list(
            filter(lambda x: set(x.scope()) == {"x6", "x7"},
                   new_factor_set.factors))[0]
        self.assertEqual(self.phi3.marginalize(["x5"], inplace=False),
                         phi3_equivalent_in_factor_set)
        phi4_equivalent_in_factor_set = list(
            filter(lambda x: set(x.scope()) == {"x8", "x7"},
                   new_factor_set.factors))[0]
        self.assertEqual(self.phi4.marginalize(["x5"], inplace=False),
                         phi4_equivalent_in_factor_set)
Пример #32
0
 def test_add_single_factor(self):
     self.graph.add_edges_from([('a', 'phi1'), ('b', 'phi1')])
     phi1 = DiscreteFactor(['a', 'b'], [2, 2], np.random.rand(4))
     self.graph.add_factors(phi1)
     six.assertCountEqual(self, self.graph.factors, [phi1])
Пример #33
0
 def setUp(self):
     self.phi1 = DiscreteFactor(["x1", "x2", "x3"], [2, 3, 2], range(12))
     self.phi2 = DiscreteFactor(["x3", "x4", "x1"], [2, 2, 2], range(8))
     self.phi3 = DiscreteFactor(["x5", "x6", "x7"], [2, 2, 2], range(8))
     self.phi4 = DiscreteFactor(["x5", "x7", "x8"], [2, 2, 2], range(8))
class TestFactorSet(unittest.TestCase):
    def setUp(self):
        self.phi1 = DiscreteFactor(['x1', 'x2', 'x3'], [2, 3, 2], range(12))
        self.phi2 = DiscreteFactor(['x3', 'x4', 'x1'], [2, 2, 2], range(8))
        self.phi3 = DiscreteFactor(['x5', 'x6', 'x7'], [2, 2, 2], range(8))
        self.phi4 = DiscreteFactor(['x5', 'x7', 'x8'], [2, 2, 2], range(8))

    def test_class_init(self):
        phi1 = DiscreteFactor(['x1', 'x2', 'x3'], [2, 3, 2], range(12))
        phi2 = DiscreteFactor(['x3', 'x4', 'x1'], [2, 2, 2], range(8))
        factor_set1 = FactorSet(phi1, phi2)
        self.assertEqual({phi1, phi2}, factor_set1.get_factors())

    def test_factorset_add_remove_factors(self):
        self.factor_set1 = FactorSet()
        self.factor_set1.add_factors(self.phi1, self.phi2)
        self.assertEqual({self.phi1, self.phi2}, self.factor_set1.get_factors())
        self.factor_set1.remove_factors(self.phi2)
        self.assertEqual({self.phi1}, self.factor_set1.get_factors())

    def test_factorset_product(self):
        factor_set1 = FactorSet(self.phi1, self.phi2)
        factor_set2 = FactorSet(self.phi3, self.phi4)
        factor_set3 = factor_set2.product(factor_set1, inplace=False)
        self.assertEqual({self.phi1, self.phi2, self.phi3, self.phi4}, factor_set3.factors)

    def test_factorset_divide(self):
        phi1 = DiscreteFactor(['x1', 'x2', 'x3'], [2, 3, 2], range(1, 13))
        phi2 = DiscreteFactor(['x3', 'x4', 'x1'], [2, 2, 2], range(1, 9))
        factor_set1 = FactorSet(phi1, phi2)
        phi3 = DiscreteFactor(['x5', 'x6', 'x7'], [2, 2, 2], range(1, 9))
        phi4 = DiscreteFactor(['x5', 'x7', 'x8'], [2, 2, 2], range(1, 9))
        factor_set2 = FactorSet(phi3, phi4)
        factor_set3 = factor_set2.divide(factor_set1, inplace=False)
        self.assertEqual({phi3, phi4, phi1.identity_factor() / phi1, phi2.identity_factor() / phi2},
                         factor_set3.factors)

    def test_factorset_marginalize_inplace(self):
        factor_set = FactorSet(self.phi1, self.phi2, self.phi3, self.phi4)
        factor_set.marginalize(['x1', 'x5'], inplace=True)
        phi1_equivalent_in_factor_set = list(filter(lambda x: set(x.scope()) == {'x2', 'x3'},
                                                    factor_set.factors))[0]
        self.assertEqual(self.phi1.marginalize(['x1'], inplace=False), phi1_equivalent_in_factor_set)
        phi2_equivalent_in_factor_set = list(filter(lambda x: set(x.scope()) == {'x4', 'x3'},
                                                    factor_set.factors))[0]
        self.assertEqual(self.phi2.marginalize(['x1'], inplace=False), phi2_equivalent_in_factor_set)
        phi3_equivalent_in_factor_set = list(filter(lambda x: set(x.scope()) == {'x6', 'x7'},
                                                    factor_set.factors))[0]
        self.assertEqual(self.phi3.marginalize(['x5'], inplace=False), phi3_equivalent_in_factor_set)
        phi4_equivalent_in_factor_set = list(filter(lambda x: set(x.scope()) == {'x8', 'x7'},
                                                    factor_set.factors))[0]
        self.assertEqual(self.phi4.marginalize(['x5'], inplace=False), phi4_equivalent_in_factor_set)

    def test_factorset_marginalize_not_inplace(self):
        factor_set = FactorSet(self.phi1, self.phi2, self.phi3, self.phi4)
        new_factor_set = factor_set.marginalize(['x1', 'x5'], inplace=False)
        phi1_equivalent_in_factor_set = list(filter(lambda x: set(x.scope()) == {'x2', 'x3'},
                                                    new_factor_set.factors))[0]
        self.assertEqual(self.phi1.marginalize(['x1'], inplace=False), phi1_equivalent_in_factor_set)
        phi2_equivalent_in_factor_set = list(filter(lambda x: set(x.scope()) == {'x4', 'x3'},
                                                    new_factor_set.factors))[0]
        self.assertEqual(self.phi2.marginalize(['x1'], inplace=False), phi2_equivalent_in_factor_set)
        phi3_equivalent_in_factor_set = list(filter(lambda x: set(x.scope()) == {'x6', 'x7'},
                                                    new_factor_set.factors))[0]
        self.assertEqual(self.phi3.marginalize(['x5'], inplace=False), phi3_equivalent_in_factor_set)
        phi4_equivalent_in_factor_set = list(filter(lambda x: set(x.scope()) == {'x8', 'x7'},
                                                    new_factor_set.factors))[0]
        self.assertEqual(self.phi4.marginalize(['x5'], inplace=False), phi4_equivalent_in_factor_set)
    def setUp(self):
        self.maxDiff = None
        variables = [
            "kid",
            "bowel-problem",
            "dog-out",
            "family-out",
            "hear-bark",
            "light-on",
        ]
        edges = [
            ["family-out", "dog-out"],
            ["bowel-problem", "dog-out"],
            ["family-out", "light-on"],
            ["dog-out", "hear-bark"],
        ]
        cpds = {
            "kid": np.array([[0.3], [0.7]]),
            "bowel-problem": np.array([[0.01], [0.99]]),
            "dog-out": np.array([[0.99, 0.01, 0.97, 0.03], [0.9, 0.1, 0.3, 0.7]]),
            "family-out": np.array([[0.15], [0.85]]),
            "hear-bark": np.array([[0.7, 0.3], [0.01, 0.99]]),
            "light-on": np.array([[0.6, 0.4], [0.05, 0.95]]),
        }
        states = {
            "kid": ["true", "false"],
            "bowel-problem": ["true", "false"],
            "dog-out": ["true", "false"],
            "family-out": ["true", "false"],
            "hear-bark": ["true", "false"],
            "light-on": ["true", "false"],
        }
        parents = {
            "kid": [],
            "bowel-problem": [],
            "dog-out": ["bowel-problem", "family-out"],
            "family-out": [],
            "hear-bark": ["dog-out"],
            "light-on": ["family-out"],
        }

        self.bayesmodel = BayesianModel()
        self.bayesmodel.add_nodes_from(variables)
        self.bayesmodel.add_edges_from(edges)

        tabular_cpds = []
        for var, values in cpds.items():
            cpd = TabularCPD(
                var,
                len(states[var]),
                values,
                evidence=parents[var],
                evidence_card=[
                    len(states[evidence_var]) for evidence_var in parents[var]
                ],
            )
            tabular_cpds.append(cpd)
        self.bayesmodel.add_cpds(*tabular_cpds)
        self.bayeswriter = UAIWriter(self.bayesmodel)

        edges = {("var_0", "var_1"), ("var_0", "var_2"), ("var_1", "var_2")}
        self.markovmodel = MarkovModel(edges)
        tables = [
            (["var_0", "var_1"], ["4.000", "2.400", "1.000", "0.000"]),
            (
                ["var_0", "var_1", "var_2"],
                [
                    "2.2500",
                    "3.2500",
                    "3.7500",
                    "0.0000",
                    "0.0000",
                    "10.0000",
                    "1.8750",
                    "4.0000",
                    "3.3330",
                    "2.0000",
                    "2.0000",
                    "3.4000",
                ],
            ),
        ]
        domain = {"var_1": "2", "var_2": "3", "var_0": "2"}
        factors = []
        for table in tables:
            variables = table[0]
            cardinality = [int(domain[var]) for var in variables]
            values = list(map(float, table[1]))
            factor = DiscreteFactor(variables, cardinality, values)
            factors.append(factor)
        self.markovmodel.add_factors(*factors)
        self.markovwriter = UAIWriter(self.markovmodel)
Пример #36
0
    def test_factor_reduce_statename(self):
        phi = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2],
                             np.ones(12),
                             state_names=self.sn1)
        phi.reduce([('speed', 'medium'), ('time', 'day')])
        self.assertEqual(phi.variables, ['switch'])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

        phi = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2],
                             np.ones(12),
                             state_names=self.sn1)
        phi = phi.reduce([('speed', 'medium'), ('time', 'day')], inplace=False)
        self.assertEqual(phi.variables, ['switch'])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

        phi = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2],
                             np.ones(12),
                             state_names=self.sn1)
        phi.reduce([('speed', 1), ('time', 0)])
        self.assertEqual(phi.variables, ['switch'])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

        phi = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2],
                             np.ones(12),
                             state_names=self.sn1)
        phi = phi.reduce([('speed', 1), ('time', 0)], inplace=False)
        self.assertEqual(phi.variables, ['switch'])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))
Пример #37
0
    def fit(self,
            L_train,
            class_balance=None,
            Y_dev=None,
            flip_negative=True,
            clamp=True,
            solve_method='triplet_mean',
            sign_recovery='all_positive',
            verbose=False):
        '''Compute the marginal probabilities of each clique and separator set in the junction tree.
        
        L_train: an m x n matrix of LF outputs. L_train[k][i] is the value of \lambda_i on item k.
            1 means positive, -1 means negative, 0 means abstain.
        class_balance: a 2^v vector of the probabilities of each combination of Y values. Sorted in
          lexicographical order (entry zero is for Y_0 = -1, ..., Y_{v-1} = -1, entry one is for
          Y_0 = -1, ..., Y_{v-1} = 1, last entry is for Y_0 = 1, ..., Y_{v-1} = 1).
        Y_dev: a v x |Y_dev| matrix of ground truth examples. If class_balance is not specified, this
          is used to find out the class balance. Otherwise not used.
          If this is not specified, and class_balance is not specified, then class balance is uniform.
          1 means positive, -1 means negative.
        flip_negative: if True, flip sign of negative probabilities
        clamp: if True and flip_negative is not True, set negative probabilities to 0
        solve_method: one of ['triplet_mean', 'triplet_median', 'triplet', 'independencies']
          If triplet, use the method below and the independencies we write down there.
          If independencies, use the following facts:
            * For any lambda_i: lambda_i * Y and Y are independent for any i, so
              E[lambda_i Y] = E[lambda_i] / E[Y]
            * For any lambda_i, lambda_j: E[lambda_i * lambda_j * Y] = E[lambda_i * lambda_j] * E[Y]
            * For an odd number of lambda's, the first property holds; for an even number, the second
              property holds
          Only triplet implemented right now.
        sign_recovery: one of ['all_positive', 'fully_independent']
          If all_positive, assume that all accuracies that we compute are positive.
          If fully_independent, assume that the accuracy of lambda_0 on Y_0 is positive, and that for
            any lambda_i and lambda_{i+1}, sign(lambda_i lambda_{i+1}) = sign(M_{i,i+1}) where M_{i, i+1}
            is the second moment between lambda_0 and lambda_i.
          If solve_method is independencies, we don't need to do this.
          Only all_positive implemented right now.
        verbose: if True, print out messages to stderr as we make progress
        
        How we go about solving these probabilities (for Triplet method):
          * We assume that we have the joint distribution/class balance of our Y's (or can infer it
            from the dev set).
          * We observe agreements and disagreements between LF's, so we can compute values like
            P(\lambda_i \lambda_j = 1).
          * The only thing we need to estimate now are correlations between LF's and (unseen) Y's -
            values like P(\lambda_i Y_j = 1).
          * Luckily, we have P(\lambda_i Y_j = 1) = 1/2(1 + E[\lambda_i Y_j]). We refer to E[\lambda_i Y_j]
            as the accuracy of \lambda_i on Y_j.
          * And because of the format of our exponential model, we have:
              E[\lambda_i Y_j]E[\lambda_k Y_j] = E[\lambda_i Y_j \lambda_k Y_j] = E[\lambda_i \lambda_k]
            For any \lambda_i, \lambda_k that are conditionally independent given Y_j. This translates to
              Y_j being a separator of \lambda_i and \lambda_k in our graphical model.
            And we can observe E[\lambda_i \lambda_k] (the second moment) from L_train!
          * The algorithm proceeds to estimate the marginal probabilities by picking out triplets of
            conditionally-independent subsets of LF's, and estimating the accuracies of LF's on Y's.
          * Then, to recover the joint probabilities, we can solve a linear system B e = r (written out in latex):
          
              $$\begin{align*}
                \begin{bmatrix}
                1 & 1 & 1 & 1 \\
                1 & 0 & 1 & 0 \\
                1 & 1 & 0 & 0 \\
                1 & 0 & 0 &1
                \end{bmatrix}
                \begin{bmatrix}
                p_{\lambda_i, Y_j}(+1, +1)\\ 
                p_{\lambda_i, Y_j}(-1, +1)  \\ 
                p_{\lambda_i, Y_j}(+1, -1) \\ 
                p_{\lambda_i, Y_j}(-1, -1) \end{bmatrix} = 
                \begin{bmatrix} 1 \\ 
                P(\lambda_{i} = 1) \\ 
                P(Y_j = 1)  \\ 
                \rho_{i, j} \end{bmatrix} .
                \end{align*}$$
            
              The values on the left of the equality are an invertible matrix, and values like
              P(\lambda_i = 1, Y_j = 1), P(\lambda_i = -1, Y_j = 1), etc for the full marginal probability.
              The values on the left of the equality are [1, P(\lambda_i = 1), P(Y_j = 1), P(\lambda_i = Y_j)]^T.
              We can observe or solve for all the values on the right, to solve for the values in the marginal
              probability!
              This can also be extended to multiple dimensions.
            
            Outputs: None.
        '''
        # if abstentions not allowed, check for zero's
        if not self.allow_abstentions:
            if np.count_nonzero(L_train) < L_train.shape[0] * L_train.shape[1]:
                print('Abstentions not allowed!')
                return

        # Y marginals to compute
        Y_marginals = {}

        # lambda marginals to compute
        lambda_marginals = {}

        # marginals will eventually be returned here
        marginals = [(clique, None)
                     for clique in sorted(list(self.junction_tree.nodes)) +
                     sorted(list(self.separator_sets))]

        def num_Ys(nodes):
            if nodes == tuple([1]) or nodes == tuple([0]):
                return 0
            return len([node for node in nodes if 'Y' in node])

        def num_lambdas(nodes):
            if nodes == tuple([1]) or nodes == tuple([0]):
                return 0
            return len([node for node in nodes if 'lambda' in node])

        observable_cliques = []
        non_observable_cliques = []

        for i, (clique, _) in enumerate(marginals):
            if num_Ys(clique) == 0 or num_lambdas(clique) == 0:
                observable_cliques.append(i)
            else:
                non_observable_cliques.append(i)

        # write down everything we need for the observable cliques
        for idx in observable_cliques:
            clique = marginals[idx][0]
            indices = tuple(
                sorted([int(node.split('_')[1]) for node in clique]))

            if 'Y' in clique[0]:
                if indices not in Y_marginals:
                    Y_marginals[indices] = None
            else:
                if indices not in lambda_marginals:
                    lambda_marginals[indices] = None

        if verbose:
            print('Marginals written down', file=sys.stderr)

        # for each marginal we need to estimate, write down the r vector that we need
        r_vecs = {}  # mapping from clique index to the r vector
        r_vals = {
        }  # mapping from a value name (like Y_1 or tuple(lambda_1, Y_1)) to its value
        for idx in non_observable_cliques:
            clique = list(reversed(sorted(marginals[idx][0])))
            r_vec = self._generate_r_vector(clique)
            r_vecs[idx] = r_vec
            for r_val in r_vec:
                if r_val not in r_vals:
                    r_vals[r_val] = None

        if verbose:
            print('R vector written down', file=sys.stderr)

        # write down all the sets of zero conditions
        lambda_zeros = {}

        # write down the moment values that we need to keep track of when we walk through the L matrix
        Y_equals_one = {}
        lambda_equals_one = {}

        # write down which expectations we need to solve using the triplet method
        expectations_to_estimate = set()
        for r_val in r_vals:
            if not self.allow_abstentions or r_val[1] == tuple(['0']):
                equals_one_tup = r_val if not self.allow_abstentions else r_val[
                    0]

                if equals_one_tup[0] == '1':
                    # If the value is 1, the probability is just 1
                    r_vals[r_val] = 1
                elif num_Ys(equals_one_tup) != 0 and num_lambdas(
                        equals_one_tup) != 0:
                    # If this contains lambdas and Y's, we can't observe it
                    expectations_to_estimate.add(r_val)
                elif num_Ys(equals_one_tup) != 0:
                    # We need to cache this moment
                    indices = tuple(
                        sorted([
                            int(node.split('_')[1]) for node in equals_one_tup
                        ]))
                    if indices not in Y_equals_one:
                        Y_equals_one[indices] = None
                elif num_lambdas(equals_one_tup) != 0:
                    # If it contains just lambdas, go through L_train
                    indices = tuple(
                        sorted([
                            int(node.split('_')[1]) for node in equals_one_tup
                        ]))
                    if indices not in lambda_equals_one:
                        lambda_equals_one[indices] = None
            else:
                # we allow abstentions, and there are clauses that are equal to zero
                equals_one_tup = r_val[0]
                equals_zero_tup = r_val[1]
                if num_lambdas(equals_one_tup) > 0 and num_Ys(
                        equals_one_tup) > 0:
                    # we can't observe this
                    expectations_to_estimate.add(r_val)
                elif num_lambdas(equals_one_tup) > 0:
                    # compute probability some lambda's multiply to one, subject to some zeros
                    pos_indices = tuple(
                        sorted([
                            int(node.split('_')[1]) for node in equals_one_tup
                        ]))
                    zero_indices = tuple(
                        sorted([
                            int(node.split('_')[1]) for node in equals_zero_tup
                        ]))

                    tup = (pos_indices, zero_indices)
                    if tup not in lambda_equals_one:
                        lambda_equals_one[tup] = None
                    if zero_indices not in lambda_zeros:
                        lambda_zeros[zero_indices] = None
                else:
                    # compute a Y equals one probability, and multiply it by probability of zeros
                    if equals_one_tup[0] != '1':
                        pos_indices = tuple(
                            sorted([
                                int(node.split('_')[1])
                                for node in equals_one_tup
                            ]))
                        if pos_indices not in Y_equals_one:
                            Y_equals_one[pos_indices] = None
                    zero_indices = tuple(
                        sorted([
                            int(node.split('_')[1]) for node in equals_zero_tup
                        ]))
                    if zero_indices not in lambda_zeros:
                        lambda_zeros[zero_indices] = None

        if verbose:
            print('Expectations to estimate written down', file=sys.stderr)

        if solve_method[:len('triplet')] == 'triplet':
            triplets, new_moment_vals, abstention_probabilities = self._triplet_method_preprocess(
                expectations_to_estimate, solve_method)
            self.triplets = triplets
        elif solve_method == 'independencies':
            print('Independencies not implemented yet!')
            return

        if verbose:
            print('Triplets constructed', file=sys.stderr)

        lambda_moment_vals = {}
        for moment in new_moment_vals:
            if moment not in lambda_moment_vals:
                lambda_moment_vals[moment] = None

        # now time to compute all the Y marginals
        self.cb = self._compute_class_balance(class_balance, Y_dev)
        Y_marginals = self._compute_Y_marginals(Y_marginals)

        if verbose:
            print('Y marginals computed', file=sys.stderr)

        Y_equals_one = self._compute_Y_equals_one(Y_equals_one)

        if verbose:
            print('Y equals one computed', file=sys.stderr)

        self.Y_marginals = Y_marginals
        self.Y_equals_one = Y_equals_one

        # now time to compute the lambda moments, marginals, zero conditions, and abstention probs
        lambda_marginals, lambda_moment_vals, lambda_equals_one, lambda_zeros, abstention_probabilities = self._lambda_pass(
            L_train,
            lambda_marginals,
            lambda_moment_vals,
            lambda_equals_one,
            lambda_zeros,
            abstention_probabilities,
            verbose=verbose)

        if verbose:
            print('lambda marginals, moments, conditions computed',
                  file=sys.stderr)

        self.lambda_marginals = lambda_marginals
        self.lambda_moment_vals = lambda_moment_vals
        self.lambda_equals_one = lambda_equals_one
        self.lambda_zeros = lambda_zeros
        self.abstention_probabilities = abstention_probabilities

        # put observable cliques in the right place
        for idx in observable_cliques:
            clique = marginals[idx][0]
            indices = tuple(
                sorted([int(node.split('_')[1]) for node in clique]))

            if 'Y' in clique[0]:
                marginal = Y_marginals[indices]
            else:
                marginal = lambda_marginals[indices]

            marginals[idx] = (clique, marginal)

        # get unobserved probabilities
        if solve_method[:len('triplet')] == 'triplet':
            probability_values, expectation_values = self._triplet_method_probabilities(
                triplets, lambda_moment_vals, lambda_zeros,
                abstention_probabilities, sign_recovery, solve_method)
        elif solve_method == 'independencies':
            print('Independencies not implemented yet!')
            return

        self.probability_values = probability_values
        self.expectation_values = expectation_values

        if verbose:
            print('Unobserved probabilities computed', file=sys.stderr)

        # put values into the R vectors
        for r_val in r_vals:
            if not self.allow_abstentions or r_val[1] == tuple(['0']):
                equals_one_tup = r_val if not self.allow_abstentions else r_val[
                    0]

                if equals_one_tup[0] == '1':
                    # If the value is 1, the probability is just 1
                    pass
                elif num_Ys(equals_one_tup) != 0 and num_lambdas(
                        equals_one_tup) != 0:
                    # If this contains lambdas and Y's, we can't observe it
                    r_vals[r_val] = probability_values[r_val]
                elif num_Ys(equals_one_tup) != 0:
                    # We need to cache this moment
                    indices = tuple(
                        sorted([
                            int(node.split('_')[1]) for node in equals_one_tup
                        ]))
                    r_vals[r_val] = Y_equals_one[indices]
                elif num_lambdas(equals_one_tup) != 0:
                    indices = tuple(
                        sorted([
                            int(node.split('_')[1]) for node in equals_one_tup
                        ]))
                    r_vals[r_val] = lambda_equals_one[indices]
            else:
                # we allow abstentions, and there are clauses that are equal to zero
                equals_one_tup = r_val[0]
                equals_zero_tup = r_val[1]
                if num_lambdas(equals_one_tup) > 0 and num_Ys(
                        equals_one_tup) > 0:
                    # we can't observe this
                    r_vals[r_val] = probability_values[r_val]
                elif num_lambdas(equals_one_tup) > 0:
                    # compute lambda moment, subject to some zeros
                    pos_indices = tuple(
                        sorted([
                            int(node.split('_')[1]) for node in equals_one_tup
                        ]))
                    zero_indices = tuple(
                        sorted([
                            int(node.split('_')[1]) for node in equals_zero_tup
                        ]))

                    tup = (pos_indices, zero_indices)
                    r_vals[r_val] = lambda_equals_one[tup]
                else:
                    # compute a Y moment, and multiply it by probability of zeros
                    if equals_one_tup[0] != '1':
                        pos_indices = tuple(
                            sorted([
                                int(node.split('_')[1])
                                for node in equals_one_tup
                            ]))

                        pos_prob = Y_equals_one[pos_indices]
                    else:
                        pos_prob = 1.
                    zero_indices = tuple(
                        sorted([
                            int(node.split('_')[1]) for node in equals_zero_tup
                        ]))
                    zero_probs = lambda_zeros[zero_indices]

                    r_vals[r_val] = pos_prob * zero_probs

        self.r_vals = r_vals

        if verbose:
            print('R values computed', file=sys.stderr)

        # solve for marginal values
        for idx in non_observable_cliques:
            clique = list(reversed(sorted(marginals[idx][0])))
            r_vec = r_vecs[idx]

            r_vec_vals = np.array([r_vals[exp] for exp in r_vec])

            # e_vec is the vector of marginal values
            e_vec = self._generate_e_vector(clique)

            b_matrix = self._generate_b_matrix(clique)

            e_vec_vals = np.linalg.inv(b_matrix) @ r_vec_vals

            e_vec_val_index = {tup: i for i, tup in enumerate(e_vec)}
            marginal_vals = np.array(
                [e_vec_vals[e_vec_val_index[tup]] for tup in sorted(e_vec)])

            if flip_negative:
                marginal_vals[
                    marginal_vals < 0] = marginal_vals[marginal_vals < 0] * -1
                marginal_vals /= sum(marginal_vals)
            elif clamp:
                marginal_vals[marginal_vals < 0] = 1e-8
                marginal_vals /= sum(marginal_vals)

            indices = [int(node.split('_')[1]) for node in clique]
            lf_indices = sorted(indices[:-1])
            Y_idx = indices[-1]

            variables = ['lambda_{}'.format(i)
                         for i in lf_indices] + ['Y_{}'.format(Y_idx)]

            # cardinality 3 for lambda variables if you allow abstentions, 2 for Y's
            cardinalities = [
                3 if self.allow_abstentions else 2
                for i in range(len(lf_indices))
            ] + [2]

            marginal = DiscreteFactor(variables, cardinalities,
                                      marginal_vals).normalize(inplace=False)

            marginals[idx] = (clique, marginal)

        self.clique_marginals = marginals[:len(self.junction_tree.nodes)]
        self.separator_marginals = marginals[len(self.junction_tree.nodes):]
        separator_degrees = {sep: 0 for sep in self.separator_sets}
        for clique1, clique2 in self.junction_tree.edges:
            separator_degrees[tuple(
                sorted(list((set(clique1).intersection(set(clique2))))))] += 1
        self.separator_degrees = separator_degrees
Пример #38
0
    def backward_inference(self, variables, evidence=None):
        """
        Backward inference method using belief propagation.

        Parameters:
        ----------
        variables: list
            list of variables for which you want to compute the probability
        evidence: dict
            a dict key, value pair as {var: state_of_var_observed}
            None if no evidence

        Examples:
        --------
        >>> from pgmpy.factors.discrete import TabularCPD
        >>> from pgmpy.models import DynamicBayesianNetwork as DBN
        >>> from pgmpy.inference import DBNInference
        >>> dbnet = DBN()
        >>> dbnet.add_edges_from([(('Z', 0), ('X', 0)), (('X', 0), ('Y', 0)),
        ...                       (('Z', 0), ('Z', 1))])
        >>> z_start_cpd = TabularCPD(('Z', 0), 2, [[0.5, 0.5]])
        >>> x_i_cpd = TabularCPD(('X', 0), 2, [[0.6, 0.9],
        ...                                    [0.4, 0.1]],
        ...                      evidence=[('Z', 0)],
        ...                      evidence_card=[2])
        >>> y_i_cpd = TabularCPD(('Y', 0), 2, [[0.2, 0.3],
        ...                                    [0.8, 0.7]],
        ...                      evidence=[('X', 0)],
        ...                      evidence_card=[2])
        >>> z_trans_cpd = TabularCPD(('Z', 1), 2, [[0.4, 0.7],
        ...                                        [0.6, 0.3]],
        ...                      evidence=[('Z', 0)],
        ...                      evidence_card=[2])
        >>> dbnet.add_cpds(z_start_cpd, z_trans_cpd, x_i_cpd, y_i_cpd)
        >>> dbnet.initialize_initial_state()
        >>> dbn_inf = DBNInference(dbnet)
        >>> dbn_inf.backward_inference([('X', 0)], {('Y', 0):0, ('Y', 1):1, ('Y', 2):1})[('X', 0)].values
        array([ 0.66594382,  0.33405618])
        """
        variable_dict = defaultdict(list)
        for var in variables:
            variable_dict[var[1]].append(var)
        time_range = max(variable_dict)
        interface_nodes_dict = {}
        if evidence:
            evid_time_range = max(
                [time_slice for var, time_slice in evidence.keys()])
            time_range = max(time_range, evid_time_range)
        end_bp = BeliefPropagation(self.start_junction_tree)
        potential_dict = self.forward_inference(variables, evidence,
                                                'potential')
        update_factor = self._shift_factor(potential_dict[time_range], 1)
        factor_values = {}

        for time_slice in range(time_range, 0, -1):
            evidence_time = self._get_evidence(evidence, time_slice, 1)
            evidence_prev_time = self._get_evidence(evidence, time_slice - 1,
                                                    0)
            if evidence_prev_time:
                interface_nodes_dict = {
                    k: v
                    for k, v in evidence_prev_time.items()
                    if k in self.interface_nodes_0
                }
            if evidence_time:
                evidence_time.update(interface_nodes_dict)
            mid_bp = BeliefPropagation(self.one_and_half_junction_tree)
            self._update_belief(mid_bp, self.in_clique,
                                potential_dict[time_slice - 1])
            forward_factor = self._shift_factor(potential_dict[time_slice], 1)
            self._update_belief(mid_bp, self.out_clique, forward_factor,
                                update_factor)

            if variable_dict[time_slice]:
                variable_time = self._shift_nodes(variable_dict[time_slice], 1)
                new_values = mid_bp.query(variable_time,
                                          evidence=evidence_time)
                changed_values = {}
                for key in new_values.keys():
                    new_key = (key[0], time_slice)
                    new_factor = DiscreteFactor([new_key],
                                                new_values[key].cardinality,
                                                new_values[key].values)
                    changed_values[new_key] = new_factor
                factor_values.update(changed_values)

            clique_phi = self._get_factor(mid_bp, evidence_time)
            in_clique_phi = self._marginalize_factor(self.interface_nodes_0,
                                                     clique_phi)
            update_factor = self._shift_factor(in_clique_phi, 1)

        out_clique_phi = self._shift_factor(update_factor, 0)
        self._update_belief(end_bp, self.start_interface_clique,
                            potential_dict[0], out_clique_phi)
        evidence_0 = self._get_evidence(evidence, 0, 0)
        if variable_dict[0]:
            factor_values.update(end_bp.query(variable_dict[0], evidence_0))
        return factor_values
Пример #39
0
 def test_class_init(self):
     phi1 = DiscreteFactor(["x1", "x2", "x3"], [2, 3, 2], range(12))
     phi2 = DiscreteFactor(["x3", "x4", "x1"], [2, 2, 2], range(8))
     factor_set1 = FactorSet(phi1, phi2)
     self.assertEqual({phi1, phi2}, factor_set1.get_factors())