Exemplo n.º 1
0
    def get_partition_function(self):
        """
        Returns the partition function for a given undirected graph.

        A partition function is defined as

        .. math:: \sum_{X}(\prod_{i=1}^{m} \phi_i)

        where m is the number of factors present in the graph
        and X are all the random variables present.

        Examples
        --------
        >>> from pgm.models import MarkovModel
        >>> from pgm.factors.discrete import DiscreteFactor
        >>> G = MarkovModel()
        >>> G.add_nodes_from(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7'])
        >>> G.add_edges_from([('x1', 'x3'), ('x1', 'x4'), ('x2', 'x4'),
        ...                   ('x2', 'x5'), ('x3', 'x6'), ('x4', 'x6'),
        ...                   ('x4', 'x7'), ('x5', 'x7')])
        >>> phi = [DiscreteFactor(edge, [2, 2], np.random.rand(4)) for edge in G.edges()]
        >>> G.add_factors(*phi)
        >>> G.get_partition_function()
        """
        self.check_model()

        factor = self.factors[0]
        factor = factor_product(
            factor, *[self.factors[i] for i in range(1, len(self.factors))])
        if set(factor.scope()) != set(self.nodes()):
            raise ValueError(
                'DiscreteFactor for all the random variables not defined.')

        return np.sum(factor.values)
Exemplo n.º 2
0
    def get_partition_function(self):
        """
        Returns the partition function for a given undirected graph.

        A partition function is defined as

        .. math:: \sum_{X}(\prod_{i=1}^{m} \phi_i)

        where m is the number of factors present in the graph
        and X are all the random variables present.

        Examples
        --------
        >>> from pgm.models import FactorGraph
        >>> from pgm.factors.discrete import DiscreteFactor
        >>> G = FactorGraph()
        >>> G.add_nodes_from(['a', 'b', 'c'])
        >>> phi1 = DiscreteFactor(['a', 'b'], [2, 2], np.random.rand(4))
        >>> phi2 = DiscreteFactor(['b', 'c'], [2, 2], np.random.rand(4))
        >>> G.add_factors(phi1, phi2)
        >>> G.add_nodes_from([phi1, phi2])
        >>> G.add_edges_from([('a', phi1), ('b', phi1),
        ...                   ('b', phi2), ('c', phi2)])
        >>> G.get_factors()
        >>> G.get_partition_function()
        """
        factor = self.factors[0]
        factor = factor_product(
            factor, *[self.factors[i] for i in range(1, len(self.factors))])
        if set(factor.scope()) != set(self.get_variable_nodes()):
            raise ValueError(
                'DiscreteFactor for all the random variables not defined.')

        return np.sum(factor.values)
Exemplo n.º 3
0
    def _get_kernel_from_bayesian_model(self, model):
        """
        Computes the Gibbs transition models from a Bayesian Network.
        'Probabilistic Graphical Model Principles and Techniques', Koller and
        Friedman, Section 12.3.3 pp 512-513.

        Parameters:
        -----------
        model: BayesianModel
            The model from which probabilities will be computed.
        """
        self.variables = np.array(model.nodes())
        self.cardinalities = {
            var: model.get_cpds(var).variable_card
            for var in self.variables
        }

        for var in self.variables:
            other_vars = [v for v in self.variables if var != v]
            other_cards = [self.cardinalities[v] for v in other_vars]
            cpds = [cpd for cpd in model.cpds if var in cpd.scope()]
            prod_cpd = factor_product(*cpds)
            kernel = {}
            scope = set(prod_cpd.scope())
            for tup in itertools.product(
                    *[range(card) for card in other_cards]):
                states = [
                    State(v, s) for v, s in zip(other_vars, tup) if v in scope
                ]
                prod_cpd_reduced = prod_cpd.reduce(states, inplace=False)
                kernel[tup] = prod_cpd_reduced.values / sum(
                    prod_cpd_reduced.values)
            self.transition_models[var] = kernel
Exemplo n.º 4
0
    def get_partition_function(self):
        r"""
        Returns the partition function for a given undirected graph.

        A partition function is defined as

        .. math:: \sum_{X}(\prod_{i=1}^{m} \phi_i)

        where m is the number of factors present in the graph
        and X are all the random variables present.

        Examples
        --------
        >>> from pgm.models import ClusterGraph
        >>> from pgm.factors.discrete import DiscreteFactor
        >>> G = ClusterGraph()
        >>> G.add_nodes_from([('a', 'b', 'c'), ('a', 'b'), ('a', 'c')])
        >>> G.add_edges_from([(('a', 'b', 'c'), ('a', 'b')),
        ...                   (('a', 'b', 'c'), ('a', 'c'))])
        >>> phi1 = DiscreteFactor(['a', 'b', 'c'], [2, 2, 2], np.random.rand(8))
        >>> phi2 = DiscreteFactor(['a', 'b'], [2, 2], np.random.rand(4))
        >>> phi3 = DiscreteFactor(['a', 'c'], [2, 2], np.random.rand(4))
        >>> G.add_factors(phi1, phi2, phi3)
        >>> G.get_partition_function()
        """
        if self.check_model():
            factor = self.factors[0]
            factor = factor_product(
                factor,
                *[self.factors[i] for i in range(1, len(self.factors))])
            return np.sum(factor.values)
Exemplo n.º 5
0
    def map_query(self, variables=None, evidence=None, elimination_order=None):
        """
        Computes the MAP Query over the variables given the evidence.

        Parameters
        ----------
        variables: list
            list of variables over which we want to compute the max-marginal.
        evidence: dict
            a dict key, value pair as {var: state_of_var_observed}
            None if no evidence
        elimination_order: list
            order of variable eliminations (if nothing is provided) order is
            computed automatically

        Examples
        --------
        >>> from pgm.inference import VariableElimination
        >>> from pgm.models import BayesianModel
        >>> import numpy as np
        >>> import pandas as pd
        >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
        ...                       columns=['A', 'B', 'C', 'D', 'E'])
        >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
        >>> model.fit(values)
        >>> inference = VariableElimination(model)
        >>> phi_query = inference.map_query(['A', 'B'])
        """
        elimination_variables = set(self.variables) - set(
            evidence.keys()) if evidence else set()
        final_distribution = self._variable_elimination(
            elimination_variables,
            'maximize',
            evidence=evidence,
            elimination_order=elimination_order)
        # To handle the case when no argument is passed then
        # _variable_elimination returns a dict.
        if isinstance(final_distribution, dict):
            final_distribution = final_distribution.values()
        distribution = factor_product(*final_distribution)
        argmax = np.argmax(distribution.values)
        assignment = distribution.assignment([argmax])[0]

        map_query_results = {}
        for var_assignment in assignment:
            var, value = var_assignment
            map_query_results[var] = value

        if not variables:
            return map_query_results
        else:
            return_dict = {}
            for var in variables:
                return_dict[var] = map_query_results[var]
            return return_dict
Exemplo n.º 6
0
    def test_junction_tree_single_clique(self):

        self.graph.add_edges_from([('x1', 'x2'), ('x2', 'x3'), ('x1', 'x3')])
        phi = [
            DiscreteFactor(edge, [2, 2], np.random.rand(4))
            for edge in self.graph.edges()
        ]
        self.graph.add_factors(*phi)

        junction_tree = self.graph.to_junction_tree()
        self.assertListEqual(hf.recursive_sorted(junction_tree.nodes()),
                             [['x1', 'x2', 'x3']])
        factors = junction_tree.get_factors()
        self.assertEqual(factors[0], factor_product(*phi))
Exemplo n.º 7
0
    def _get_factor(self, belief_prop, evidence):
        """
        Extracts the required factor from the junction tree.

        Parameters:
        ----------
        belief_prop: Belief Propagation
            Belief Propagation which needs to be updated.

        evidence: dict
            a dict key, value pair as {var: state_of_var_observed}
        """
        final_factor = factor_product(*belief_prop.junction_tree.get_factors())
        if evidence:
            for var in evidence:
                if var in final_factor.scope():
                    final_factor.reduce([(var, evidence[var])])
        return final_factor
Exemplo n.º 8
0
    def max_marginal(self,
                     variables=None,
                     evidence=None,
                     elimination_order=None):
        """
        Computes the max-marginal over the variables given the evidence.

        Parameters
        ----------
        variables: list
            list of variables over which we want to compute the max-marginal.
        evidence: dict
            a dict key, value pair as {var: state_of_var_observed}
            None if no evidence
        elimination_order: list
            order of variable eliminations (if nothing is provided) order is
            computed automatically

        Examples
        --------
        >>> import numpy as np
        >>> import pandas as pd
        >>> from pgm.models import BayesianModel
        >>> from pgm.inference import VariableElimination
        >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
        ...                       columns=['A', 'B', 'C', 'D', 'E'])
        >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
        >>> model.fit(values)
        >>> inference = VariableElimination(model)
        >>> phi_query = inference.max_marginal(['A', 'B'])
        """
        if not variables:
            variables = []
        final_distribution = self._variable_elimination(
            variables,
            'maximize',
            evidence=evidence,
            elimination_order=elimination_order)

        # To handle the case when no argument is passed then
        # _variable_elimination returns a dict.
        if isinstance(final_distribution, dict):
            final_distribution = final_distribution.values()
        return np.max(factor_product(*final_distribution).values)
Exemplo n.º 9
0
    def _get_kernel_from_markov_model(self, model):
        """
        Computes the Gibbs transition models from a Markov Network.
        'Probabilistic Graphical Model Principles and Techniques', Koller and
        Friedman, Section 12.3.3 pp 512-513.

        Parameters:
        -----------
        model: MarkovModel
            The model from which probabilities will be computed.
        """
        self.variables = np.array(model.nodes())
        factors_dict = {var: [] for var in self.variables}
        for factor in model.get_factors():
            for var in factor.scope():
                factors_dict[var].append(factor)

        # Take factor product
        factors_dict = {
            var: factor_product(*factors) if len(factors) > 1 else factors[0]
            for var, factors in factors_dict.items()
        }
        self.cardinalities = {
            var: factors_dict[var].get_cardinality([var])[var]
            for var in self.variables
        }

        for var in self.variables:
            other_vars = [v for v in self.variables if var != v]
            other_cards = [self.cardinalities[v] for v in other_vars]
            kernel = {}
            factor = factors_dict[var]
            scope = set(factor.scope())
            for tup in itertools.product(
                    *[range(card) for card in other_cards]):
                states = [
                    State(var, s) for var, s in zip(other_vars, tup)
                    if var in scope
                ]
                reduced_factor = factor.reduce(states, inplace=False)
                kernel[tup] = reduced_factor.values / sum(
                    reduced_factor.values)
            self.transition_models[var] = kernel
Exemplo n.º 10
0
    def to_junction_tree(self):
        """
        Creates a junction tree (or clique tree) for a given markov model.

        For a given markov model (H) a junction tree (G) is a graph
        1. where each node in G corresponds to a maximal clique in H
        2. each sepset in G separates the variables strictly on one side of the
        edge to other.

        Examples
        --------
        >>> from pgm.models import MarkovModel
        >>> from pgm.factors.discrete import DiscreteFactor
        >>> mm = MarkovModel()
        >>> mm.add_nodes_from(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7'])
        >>> mm.add_edges_from([('x1', 'x3'), ('x1', 'x4'), ('x2', 'x4'),
        ...                    ('x2', 'x5'), ('x3', 'x6'), ('x4', 'x6'),
        ...                    ('x4', 'x7'), ('x5', 'x7')])
        >>> phi = [DiscreteFactor(edge, [2, 2], np.random.rand(4)) for edge in mm.edges()]
        >>> mm.add_factors(*phi)
        >>> junction_tree = mm.to_junction_tree()
        """
        from pgm.models import JunctionTree

        # Check whether the model is valid or not
        self.check_model()

        # Triangulate the graph to make it chordal
        triangulated_graph = self.triangulate()

        # Find maximal cliques in the chordal graph
        cliques = list(map(tuple, nx.find_cliques(triangulated_graph)))

        # If there is only 1 clique, then the junction tree formed is just a
        # clique tree with that single clique as the node
        if len(cliques) == 1:
            clique_trees = JunctionTree()
            clique_trees.add_node(cliques[0])

        # Else if the number of cliques is more than 1 then create a complete
        # graph with all the cliques as nodes and weight of the edges being
        # the length of sepset between two cliques
        elif len(cliques) >= 2:
            complete_graph = UndirectedGraph()
            edges = list(itertools.combinations(cliques, 2))
            weights = list(
                map(lambda x: len(set(x[0]).intersection(set(x[1]))), edges))
            for edge, weight in zip(edges, weights):
                complete_graph.add_edge(*edge, weight=-weight)

            # Create clique trees by minimum (or maximum) spanning tree method
            clique_trees = JunctionTree(
                nx.minimum_spanning_tree(complete_graph).edges())

        # Check whether the factors are defined for all the random variables or not
        all_vars = itertools.chain(
            *[factor.scope() for factor in self.factors])
        if set(all_vars) != set(self.nodes()):
            ValueError(
                'DiscreteFactor for all the random variables not specified')

        # Dictionary stating whether the factor is used to create clique
        # potential or not
        # If false, then it is not used to create any clique potential
        is_used = {factor: False for factor in self.factors}

        for node in clique_trees.nodes():
            clique_factors = []
            for factor in self.factors:
                # If the factor is not used in creating any clique potential as
                # well as has any variable of the given clique in its scope,
                # then use it in creating clique potential
                if not is_used[factor] and set(factor.scope()).issubset(node):
                    clique_factors.append(factor)
                    is_used[factor] = True

            # To compute clique potential, initially set it as unity factor
            var_card = [self.get_cardinality()[x] for x in node]
            clique_potential = DiscreteFactor(node, var_card,
                                              np.ones(np.product(var_card)))
            # multiply it with the factors associated with the variables present
            # in the clique (or node)
            clique_potential *= factor_product(*clique_factors)
            clique_trees.add_factors(clique_potential)

        if not all(is_used.values()):
            raise ValueError(
                'All the factors were not used to create Junction Tree.'
                'Extra factors are defined.')

        return clique_trees
Exemplo n.º 11
0
    def _variable_elimination(self,
                              variables,
                              operation,
                              evidence=None,
                              elimination_order=None):
        """
        Implementation of a generalized variable elimination.

        Parameters
        ----------
        variables: list, array-like
            variables that are not to be eliminated.
        operation: str ('marginalize' | 'maximize')
            The operation to do for eliminating the variable.
        evidence: dict
            a dict key, value pair as {var: state_of_var_observed}
            None if no evidence
        elimination_order: list, array-like
            list of variables representing the order in which they
            are to be eliminated. If None order is computed automatically.
        """
        if isinstance(variables, string_types):
            raise TypeError("variables must be a list of strings")
        if isinstance(evidence, string_types):
            raise TypeError("evidence must be a list of strings")

        # Dealing with the case when variables is not provided.
        if not variables:
            all_factors = []
            for factor_li in self.factors.values():
                all_factors.extend(factor_li)
            return set(all_factors)

        eliminated_variables = set()
        working_factors = {
            node: {factor
                   for factor in self.factors[node]}
            for node in self.factors
        }

        # Dealing with evidence. Reducing factors over it before VE is run.
        if evidence:
            for evidence_var in evidence:
                for factor in working_factors[evidence_var]:
                    factor_reduced = factor.reduce(
                        [(evidence_var, evidence[evidence_var])],
                        inplace=False)
                    for var in factor_reduced.scope():
                        working_factors[var].remove(factor)
                        working_factors[var].add(factor_reduced)
                del working_factors[evidence_var]

        # TODO: Modify it to find the optimal elimination order
        if not elimination_order:
            elimination_order = list(
                set(self.variables) - set(variables) -
                set(evidence.keys() if evidence else []))

        elif any(var in elimination_order for var in set(variables).union(
                set(evidence.keys() if evidence else []))):
            raise ValueError(
                "Elimination order contains variables which are in"
                " variables or evidence args")

        for var in elimination_order:
            # Removing all the factors containing the variables which are
            # eliminated (as all the factors should be considered only once)
            factors = [
                factor for factor in working_factors[var]
                if not set(factor.variables).intersection(eliminated_variables)
            ]
            phi = factor_product(*factors)
            phi = getattr(phi, operation)([var], inplace=False)
            del working_factors[var]
            for variable in phi.variables:
                working_factors[variable].add(phi)
            eliminated_variables.add(var)

        final_distribution = set()
        for node in working_factors:
            factors = working_factors[node]
            for factor in factors:
                if not set(
                        factor.variables).intersection(eliminated_variables):
                    final_distribution.add(factor)

        query_var_factor = {}
        for query_var in variables:
            phi = factor_product(*final_distribution)
            query_var_factor[query_var] = phi.marginalize(
                list(set(variables) - set([query_var])),
                inplace=False).normalize(inplace=False)
        return query_var_factor