Пример #1
0
    def get_factors(self, node=None):
        """
        Returns the factors that have been added till now to the graph.

        If node is not None, it would return the factor corresponding to the
        given node.

        Examples
        --------
        >>> from pgm.models import FactorGraph
        >>> from pgm.factors.discrete import DiscreteFactor
        >>> G = FactorGraph()
        >>> G.add_nodes_from(['a', 'b', 'c'])
        >>> phi1 = DiscreteFactor(['a', 'b'], [2, 2], np.random.rand(4))
        >>> phi2 = DiscreteFactor(['b', 'c'], [2, 2], np.random.rand(4))
        >>> G.add_factors(phi1, phi2)
        >>> G.add_nodes_from([phi1, phi2])
        >>> G.add_edges_from([('a', phi1), ('b', phi1),
        ...                   ('b', phi2), ('c', phi2)])
        >>> G.get_factors()
        >>> G.get_factors(node=phi1)
        """
        if node is None:
            return self.factors
        else:
            factor_nodes = self.get_factor_nodes()
            if node not in factor_nodes:
                raise ValueError('Factors are not associated with the '
                                 'corresponding node.')
            factors = list(
                filter(lambda x: set(x.scope()) == set(self.neighbors(node)),
                       self.factors))
            return factors[0]
Пример #2
0
    def get_factors(self, node=None):
        """
        Return the factors that have been added till now to the graph.

        If node is not None, it would return the factor corresponding to the
        given node.

        Examples
        --------
        >>> from pgm.models import ClusterGraph
        >>> from pgm.factors.discrete import DiscreteFactor
        >>> G = ClusterGraph()
        >>> G.add_nodes_from([('a', 'b', 'c'), ('a', 'b'), ('a', 'c')])
        >>> G.add_edges_from([(('a', 'b', 'c'), ('a', 'b')),
        ...                   (('a', 'b', 'c'), ('a', 'c'))])
        >>> phi1 = DiscreteFactor(['a', 'b', 'c'], [2, 2, 2], np.random.rand(8))
        >>> phi2 = DiscreteFactor(['a', 'b'], [2, 2], np.random.rand(4))
        >>> phi3 = DiscreteFactor(['a', 'c'], [2, 2], np.random.rand(4))
        >>> G.add_factors(phi1, phi2, phi3)
        >>> G.get_factors()
        >>> G.get_factors(node=('a', 'b', 'c'))
        """
        if node is None:
            return self.factors
        else:
            nodes = [set(n) for n in self.nodes()]

            if set(node) not in nodes:
                raise ValueError('Node not present in Cluster Graph')

            factors = filter(lambda x: set(x.scope()) == set(node),
                             self.factors)
            return next(factors)
Пример #3
0
    def check_model(self):
        """
        Check the model for various errors. This method checks for the following
        errors.

        * Checks if factors are defined for all the cliques or not.
        * Check for running intersection property is not done explicitly over
        here as it done in the add_edges method.
        * Check if cardinality of random variable remains same across all the
        factors.

        Returns
        -------
        check: boolean
            True if all the checks are passed
        """
        for clique in self.nodes():
            factors = filter(lambda x: set(x.scope()) == set(clique),
                             self.factors)
            if not any(factors):
                raise ValueError(
                    'Factors for all the cliques or clusters not defined.')

        cardinalities = self.get_cardinality()
        for factor in self.factors:
            for variable, cardinality in zip(factor.scope(),
                                             factor.cardinality):
                if (cardinalities[variable] != cardinality):
                    raise ValueError(
                        'Cardinality of variable {var} not matching among factors'
                        .format(var=variable))
        return True
Пример #4
0
    def induced_graph(self, elimination_order):
        """
        Returns the induced graph formed by running Variable Elimination on the network.

        Parameters
        ----------
        elimination_order: list, array like
            List of variables in the order in which they are to be eliminated.

        Examples
        --------
        >>> import numpy as np
        >>> import pandas as pd
        >>> from pgm.models import BayesianModel
        >>> from pgm.inference import VariableElimination
        >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
        ...                       columns=['A', 'B', 'C', 'D', 'E'])
        >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
        >>> model.fit(values)
        >>> inference = VariableElimination(model)
        >>> inference.induced_graph(['C', 'D', 'A', 'B', 'E'])
        <networkx.classes.graph.Graph at 0x7f34ac8c5160>
        """
        # If the elimination order does not contain the same variables as the model
        if set(elimination_order) != set(self.variables):
            raise ValueError("Set of variables in elimination order"
                             " different from variables in model")

        eliminated_variables = set()
        working_factors = {
            node: [factor.scope() for factor in self.factors[node]]
            for node in self.factors
        }

        # The set of cliques that should be in the induced graph
        cliques = set()
        for factors in working_factors.values():
            for factor in factors:
                cliques.add(tuple(factor))

        # Removing all the factors containing the variables which are
        # eliminated (as all the factors should be considered only once)
        for var in elimination_order:
            factors = [
                factor for factor in working_factors[var]
                if not set(factor).intersection(eliminated_variables)
            ]
            phi = set(itertools.chain(*factors)).difference({var})
            cliques.add(tuple(phi))
            del working_factors[var]
            for variable in phi:
                working_factors[variable].append(list(phi))
            eliminated_variables.add(var)

        edges_comb = [
            itertools.combinations(c, 2)
            for c in filter(lambda x: len(x) > 1, cliques)
        ]
        return nx.Graph(itertools.chain(*edges_comb))
Пример #5
0
 def test_factorset_marginalize_not_inplace(self):
     factor_set = FactorSet(self.phi1, self.phi2, self.phi3, self.phi4)
     new_factor_set = factor_set.marginalize(['x1', 'x5'], inplace=False)
     phi1_equivalent_in_factor_set = list(
         filter(lambda x: set(x.scope()) == {'x2', 'x3'},
                new_factor_set.factors))[0]
     self.assertEqual(self.phi1.marginalize(['x1'], inplace=False),
                      phi1_equivalent_in_factor_set)
     phi2_equivalent_in_factor_set = list(
         filter(lambda x: set(x.scope()) == {'x4', 'x3'},
                new_factor_set.factors))[0]
     self.assertEqual(self.phi2.marginalize(['x1'], inplace=False),
                      phi2_equivalent_in_factor_set)
     phi3_equivalent_in_factor_set = list(
         filter(lambda x: set(x.scope()) == {'x6', 'x7'},
                new_factor_set.factors))[0]
     self.assertEqual(self.phi3.marginalize(['x5'], inplace=False),
                      phi3_equivalent_in_factor_set)
     phi4_equivalent_in_factor_set = list(
         filter(lambda x: set(x.scope()) == {'x8', 'x7'},
                new_factor_set.factors))[0]
     self.assertEqual(self.phi4.marginalize(['x5'], inplace=False),
                      phi4_equivalent_in_factor_set)
Пример #6
0
    def marginalize(self, variables, inplace=True):
        """
        Marginalizes the factors present in the factor sets with respect to the given variables.

        Parameters
        ----------
        variables: list, array-like
            List of the variables to be marginalized.

        inplace: boolean (Default value True)
            If inplace=True it will modify the factor set itself, would create a new factor set

        Returns
        -------
        If inplace = False, will return a new marginalized FactorSet object.

        Examples
        --------
        >>> from pgm.factors import FactorSet
        >>> from pgm.factors.discrete import DiscreteFactor
        >>> phi1 = DiscreteFactor(['x1', 'x2', 'x3'], [2, 3, 2], range(12))
        >>> phi2 = DiscreteFactor(['x3', 'x4', 'x1'], [2, 2, 2], range(8))
        >>> factor_set1 = FactorSet(phi1, phi2)
        >>> factor_set1.marginalize('x1')
        >>> print(factor_set1)
        set([<DiscreteFactor representing phi(x2:3, x3:2) at 0x7f8e32b4cc10>,
             <DiscreteFactor representing phi(x3:2, x4:2) at 0x7f8e32b4cf90>])
        """
        if isinstance(variables, six.string_types):
            raise TypeError('Expected list or array-like type got type str')

        factor_set = self if inplace else self.copy()

        factors_to_be_marginalized = set(
            filter(lambda x: set(x.scope()).intersection(variables),
                   factor_set.factors))

        for factor in factors_to_be_marginalized:
            variables_to_be_marginalized = list(
                set(factor.scope()).intersection(variables))
            if inplace:
                factor.marginalize(variables_to_be_marginalized, inplace=True)
            else:
                factor_set.remove_factors(factor)
                factor_set.add_factors(
                    factor.marginalize(variables_to_be_marginalized,
                                       inplace=False))

        if not inplace:
            return factor_set
Пример #7
0
    def find_triangles(self):
        """
        Finds all the triangles present in the given model

        Examples
        --------
        >>> from pgm.models import MarkovModel
        >>> from pgm.factors.discrete import DiscreteFactor
        >>> from pgm.inference import Mplp
        >>> mm = MarkovModel()
        >>> mm.add_nodes_from(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7'])
        >>> mm.add_edges_from([('x1', 'x3'), ('x1', 'x4'), ('x2', 'x4'),
        ...                    ('x2', 'x5'), ('x3', 'x6'), ('x4', 'x6'),
        ...                    ('x4', 'x7'), ('x5', 'x7')])
        >>> phi = [DiscreteFactor(edge, [2, 2], np.random.rand(4)) for edge in mm.edges()]
        >>> mm.add_factors(*phi)
        >>> mplp = Mplp(mm)
        >>> mplp.find_triangles()
        """
        return list(filter(lambda x: len(x) == 3, nx.find_cliques(self.model)))
Пример #8
0
    def _query(self, variables, operation, evidence=None):
        """
        This is a generalized query method that can be used for both query and map query.

        Parameters
        ----------
        variables: list
            list of variables for which you want to compute the probability
        operation: str ('marginalize' | 'maximize')
            The operation to do for passing messages between nodes.
        evidence: dict
            a dict key, value pair as {var: state_of_var_observed}
            None if no evidence

        Examples
        --------
        >>> from pgm.inference import BeliefPropagation
        >>> from pgm.models import BayesianModel
        >>> import numpy as np
        >>> import pandas as pd
        >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
        ...                       columns=['A', 'B', 'C', 'D', 'E'])
        >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
        >>> model.fit(values)
        >>> inference = BeliefPropagation(model)
        >>> phi_query = inference.query(['A', 'B'])

        References
        ----------
        Algorithm 10.4 Out-of-clique inference in clique tree
        Probabilistic Graphical Models: Principles and Techniques Daphne Koller and Nir Friedman.
        """

        is_calibrated = self._is_converged(operation=operation)
        # Calibrate the junction tree if not calibrated
        if not is_calibrated:
            self.calibrate()

        if not isinstance(variables, (list, tuple, set)):
            query_variables = [variables]
        else:
            query_variables = list(variables)
        query_variables.extend(evidence.keys() if evidence else [])

        # Find a tree T' such that query_variables are a subset of scope(T')
        nodes_with_query_variables = set()
        for var in query_variables:
            nodes_with_query_variables.update(
                filter(lambda x: var in x, self.junction_tree.nodes()))
        subtree_nodes = nodes_with_query_variables

        # Conversion of set to tuple just for indexing
        nodes_with_query_variables = tuple(nodes_with_query_variables)
        # As junction tree is a tree, that means that there would be only path between any two nodes in the tree
        # thus we can just take the path between any two nodes; no matter there order is
        for i in range(len(nodes_with_query_variables) - 1):
            subtree_nodes.update(
                nx.shortest_path(self.junction_tree,
                                 nodes_with_query_variables[i],
                                 nodes_with_query_variables[i + 1]))
        subtree_undirected_graph = self.junction_tree.subgraph(subtree_nodes)
        # Converting subtree into a junction tree
        if len(subtree_nodes) == 1:
            subtree = JunctionTree()
            subtree.add_node(subtree_nodes.pop())
        else:
            subtree = JunctionTree(subtree_undirected_graph.edges())

        # Selecting a node is root node. Root node would be having only one neighbor
        if len(subtree.nodes()) == 1:
            root_node = subtree.nodes()[0]
        else:
            root_node = tuple(
                filter(lambda x: len(subtree.neighbors(x)) == 1,
                       subtree.nodes()))[0]
        clique_potential_list = [self.clique_beliefs[root_node]]

        # For other nodes in the subtree compute the clique potentials as follows
        # As all the nodes are nothing but tuples so simple set(root_node) won't work at it would update the set with'
        # all the elements of the tuple; instead use set([root_node]) as it would include only the tuple not the
        # internal elements within it.
        parent_nodes = set([root_node])
        nodes_traversed = set()
        while parent_nodes:
            parent_node = parent_nodes.pop()
            for child_node in set(
                    subtree.neighbors(parent_node)) - nodes_traversed:
                clique_potential_list.append(
                    self.clique_beliefs[child_node] /
                    self.sepset_beliefs[frozenset([parent_node, child_node])])
                parent_nodes.update([child_node])
            nodes_traversed.update([parent_node])

        # Add factors to the corresponding junction tree
        subtree.add_factors(*clique_potential_list)

        # Sum product variable elimination on the subtree
        variable_elimination = VariableElimination(subtree)
        if operation == 'marginalize':
            return variable_elimination.query(variables=variables,
                                              evidence=evidence)
        elif operation == 'maximize':
            return variable_elimination.map_query(variables=variables,
                                                  evidence=evidence)