Пример #1
0
    def get_cardinality(self, check_cardinality=False):
        """
        Returns a dictionary with the given factors as keys and their respective
        cardinality as values.

        Parameters
        ----------
        check_cardinality: boolean, optional
            If, check_cardinality=True it checks if cardinality information
            for all the variables is availble or not. If not it raises an error.

        Examples
        --------
        >>> from pgm.models import MarkovModel
        >>> from pgm.factors.discrete import DiscreteFactor
        >>> student = MarkovModel([('Alice', 'Bob'), ('Bob', 'Charles')])
        >>> factor = DiscreteFactor(['Alice', 'Bob'], cardinality=[2, 2],
        ...                 values=np.random.rand(4))
        >>> student.add_factors(factor)
        >>> student.get_cardinality()
        defaultdict(<class 'int'>, {'Bob': 2, 'Alice': 2})
        """
        cardinalities = defaultdict(int)
        for factor in self.factors:
            for variable, cardinality in zip(factor.scope(),
                                             factor.cardinality):
                cardinalities[variable] = cardinality
        if check_cardinality and len(self.nodes()) != len(cardinalities):
            raise ValueError('Factors for all the variables not defined')
        return cardinalities
Пример #2
0
    def get_cardinality(self, check_cardinality=False):
        """
        Returns a dictionary with the given factors as keys and their respective
        cardinality as values.

        Parameters
        ----------
        check_cardinality: boolean, optional
            If, check_cardinality=True it checks if cardinality information
            for all the variables is availble or not. If not it raises an error.

        >>> from pgm.models import FactorGraph
        >>> from pgm.factors import DiscreteFactor
        >>> G = FactorGraph()
        >>> G.add_nodes_from(['a', 'b', 'c'])
        >>> phi1 = DiscreteFactor(['a', 'b'], [2, 2], np.random.rand(4))
        >>> phi2 = DiscreteFactor(['b', 'c'], [2, 2], np.random.rand(4))
        >>> G.add_nodes_from([phi1, phi2])
        >>> G.add_edges_from([('a', phi1), ('b', phi1),
        ...                   ('b', phi2), ('c', phi2)])
        >>> G.add_factors(phi1, phi2)
        >>> G.get_cardinality()
            defaultdict(<class 'int'>, {'c': 2, 'b': 2, 'a': 2})
        """
        cardinalities = defaultdict(int)
        for factor in self.factors:
            for variable, cardinality in zip(factor.scope(),
                                             factor.cardinality):
                cardinalities[variable] = cardinality
        if check_cardinality and len(
                self.get_variable_nodes()) != len(cardinalities):
            raise ValueError('Factors for all the variables not defined')
        return cardinalities
Пример #3
0
    def check_model(self):
        """
        Check the model for various errors. This method checks for the following
        errors -

        * Checks if the cardinalities of all the variables are consistent across all the factors.
        * Factors are defined for all the random variables.

        Returns
        -------
        check: boolean
            True if all the checks are passed
        """
        cardinalities = self.get_cardinality()
        for factor in self.factors:
            for variable, cardinality in zip(factor.scope(),
                                             factor.cardinality):
                if cardinalities[variable] != cardinality:
                    raise ValueError(
                        'Cardinality of variable {var} not matching among factors'
                        .format(var=variable))
            for var1, var2 in itertools.combinations(factor.variables, 2):
                if var2 not in self.neighbors(var1):
                    raise ValueError(
                        "DiscreteFactor inconsistent with the model.")
        return True
Пример #4
0
    def __init__(self, variables=None, card=None, start_state=None):
        """
        Parameters:
        -----------
        variables: array-like iterable object
            A list of variables of the model.

        card: array-like iterable object
            A list of cardinalities of the variables.

        start_state: array-like iterable object
            List of tuples representing the starting states of the variables.
        """
        if variables is None:
            variables = []
        if card is None:
            card = []
        if not hasattr(variables, '__iter__') or isinstance(
                variables, six.string_types):
            raise ValueError('variables must be a non-string iterable.')
        if not hasattr(card, '__iter__') or isinstance(card, six.string_types):
            raise ValueError('card must be a non-string iterable.')
        self.variables = variables
        self.cardinalities = {v: c for v, c in zip(variables, card)}
        self.transition_models = {var: {} for var in variables}
        if start_state is None or self._check_state(start_state):
            self.state = start_state
Пример #5
0
    def check_model(self):
        """
        Check the model for various errors. This method checks for the following
        errors.

        * Checks if factors are defined for all the cliques or not.
        * Check for running intersection property is not done explicitly over
        here as it done in the add_edges method.
        * Check if cardinality of random variable remains same across all the
        factors.

        Returns
        -------
        check: boolean
            True if all the checks are passed
        """
        for clique in self.nodes():
            factors = filter(lambda x: set(x.scope()) == set(clique),
                             self.factors)
            if not any(factors):
                raise ValueError(
                    'Factors for all the cliques or clusters not defined.')

        cardinalities = self.get_cardinality()
        for factor in self.factors:
            for variable, cardinality in zip(factor.scope(),
                                             factor.cardinality):
                if (cardinalities[variable] != cardinality):
                    raise ValueError(
                        'Cardinality of variable {var} not matching among factors'
                        .format(var=variable))
        return True
Пример #6
0
    def __repr__(self):
        var_str = '<TabularCPD representing P({var}:{card}'.format(
            var=self.variable, card=self.variable_card)

        evidence = self.variables[1:]
        evidence_card = self.cardinality[1:]
        if evidence:
            evidence_str = ' | ' + ', '.join(['{var}:{card}'.format(var=var, card=card)
                                              for var, card in zip(evidence, evidence_card)])
        else:
            evidence_str = ''

        return var_str + evidence_str + ') at {address}>'.format(address=hex(id(self)))
Пример #7
0
    def check_model(self):
        """
        Check the model for various errors. This method checks for the following
        errors. In the same time it also updates the cardinalities of all the
        random variables.

        * Check whether bipartite property of factor graph is still maintained
        or not.
        * Check whether factors are associated for all the random variables or not.
        * Check if factors are defined for each factor node or not.
        * Check if cardinality of random variable remains same across all the
        factors.
        """
        variable_nodes = set(
            [x for factor in self.factors for x in factor.scope()])
        factor_nodes = set(self.nodes()) - variable_nodes

        if not all(
                isinstance(factor_node, DiscreteFactor)
                for factor_node in factor_nodes):
            raise ValueError(
                'Factors not associated for all the random variables')

        if (not (bipartite.is_bipartite(self)) or
                not (bipartite.is_bipartite_node_set(self, variable_nodes) or
                     bipartite.is_bipartite_node_set(self, variable_nodes))):
            raise ValueError('Edges can only be between variables and factors')

        if len(factor_nodes) != len(self.factors):
            raise ValueError(
                'Factors not associated with all the factor nodes.')

        cardinalities = self.get_cardinality()
        for factor in self.factors:
            for variable, cardinality in zip(factor.scope(),
                                             factor.cardinality):
                if (cardinalities[variable] != cardinality):
                    raise ValueError(
                        'Cardinality of variable {var} not matching among factors'
                        .format(var=variable))

        return True
Пример #8
0
    def add_variables_from(self, variables, cards):
        """
        Add several variables to the model at once.

        Parameters:
        -----------
        variables: array-like iterable object
            List of variables to be added.

        cards: array-like iterable object
            List of cardinalities of the variables to be added.

        Examples:
        ---------
        >>> from pgm.models import MarkovChain as MC
        >>> model = MC()
        >>> model.add_variables_from(['x', 'y'], [3, 4])
        """
        for var, card in zip(variables, cards):
            self.add_variable(var, card)
Пример #9
0
    def assignment(self, index):
        """
        Returns a list of assignments for the corresponding index.

        Parameters
        ----------
        index: list, array-like
            List of indices whose assignment is to be computed

        Returns
        -------
        list: Returns a list of full assignments of all the variables of the factor.

        Examples
        --------
        >>> import numpy as np
        >>> from pgm.factors.discrete import DiscreteFactor
        >>> phi = DiscreteFactor(['diff', 'intel'], [2, 2], np.ones(4))
        >>> phi.assignment([1, 2])
        [[('diff', 0), ('intel', 1)], [('diff', 1), ('intel', 0)]]
        """
        index = np.array(index)

        max_possible_index = np.prod(self.cardinality) - 1
        if not all(i <= max_possible_index for i in index):
            raise IndexError("Index greater than max possible index")

        assignments = np.zeros((len(index), len(self.scope())), dtype=np.int)
        rev_card = self.cardinality[::-1]
        for i, card in enumerate(rev_card):
            assignments[:, i] = index % card
            index = index // card

        assignments = assignments[:, ::-1]

        return [[(key, val) for key, val in zip(self.variables, values)] for values in assignments]
Пример #10
0
    def to_junction_tree(self):
        """
        Creates a junction tree (or clique tree) for a given markov model.

        For a given markov model (H) a junction tree (G) is a graph
        1. where each node in G corresponds to a maximal clique in H
        2. each sepset in G separates the variables strictly on one side of the
        edge to other.

        Examples
        --------
        >>> from pgm.models import MarkovModel
        >>> from pgm.factors.discrete import DiscreteFactor
        >>> mm = MarkovModel()
        >>> mm.add_nodes_from(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7'])
        >>> mm.add_edges_from([('x1', 'x3'), ('x1', 'x4'), ('x2', 'x4'),
        ...                    ('x2', 'x5'), ('x3', 'x6'), ('x4', 'x6'),
        ...                    ('x4', 'x7'), ('x5', 'x7')])
        >>> phi = [DiscreteFactor(edge, [2, 2], np.random.rand(4)) for edge in mm.edges()]
        >>> mm.add_factors(*phi)
        >>> junction_tree = mm.to_junction_tree()
        """
        from pgm.models import JunctionTree

        # Check whether the model is valid or not
        self.check_model()

        # Triangulate the graph to make it chordal
        triangulated_graph = self.triangulate()

        # Find maximal cliques in the chordal graph
        cliques = list(map(tuple, nx.find_cliques(triangulated_graph)))

        # If there is only 1 clique, then the junction tree formed is just a
        # clique tree with that single clique as the node
        if len(cliques) == 1:
            clique_trees = JunctionTree()
            clique_trees.add_node(cliques[0])

        # Else if the number of cliques is more than 1 then create a complete
        # graph with all the cliques as nodes and weight of the edges being
        # the length of sepset between two cliques
        elif len(cliques) >= 2:
            complete_graph = UndirectedGraph()
            edges = list(itertools.combinations(cliques, 2))
            weights = list(
                map(lambda x: len(set(x[0]).intersection(set(x[1]))), edges))
            for edge, weight in zip(edges, weights):
                complete_graph.add_edge(*edge, weight=-weight)

            # Create clique trees by minimum (or maximum) spanning tree method
            clique_trees = JunctionTree(
                nx.minimum_spanning_tree(complete_graph).edges())

        # Check whether the factors are defined for all the random variables or not
        all_vars = itertools.chain(
            *[factor.scope() for factor in self.factors])
        if set(all_vars) != set(self.nodes()):
            ValueError(
                'DiscreteFactor for all the random variables not specified')

        # Dictionary stating whether the factor is used to create clique
        # potential or not
        # If false, then it is not used to create any clique potential
        is_used = {factor: False for factor in self.factors}

        for node in clique_trees.nodes():
            clique_factors = []
            for factor in self.factors:
                # If the factor is not used in creating any clique potential as
                # well as has any variable of the given clique in its scope,
                # then use it in creating clique potential
                if not is_used[factor] and set(factor.scope()).issubset(node):
                    clique_factors.append(factor)
                    is_used[factor] = True

            # To compute clique potential, initially set it as unity factor
            var_card = [self.get_cardinality()[x] for x in node]
            clique_potential = DiscreteFactor(node, var_card,
                                              np.ones(np.product(var_card)))
            # multiply it with the factors associated with the variables present
            # in the clique (or node)
            clique_potential *= factor_product(*clique_factors)
            clique_trees.add_factors(clique_potential)

        if not all(is_used.values()):
            raise ValueError(
                'All the factors were not used to create Junction Tree.'
                'Extra factors are defined.')

        return clique_trees
Пример #11
0
 def __repr__(self):
     var_card = ", ".join(['{var}:{card}'.format(var=var, card=card)
                           for var, card in zip(self.variables, self.cardinality)])
     return "<Joint Distribution representing P({var_card}) at {address}>".format(address=hex(id(self)),
                                                                                  var_card=var_card)
Пример #12
0
 def test_add_variables_from(self, add_var):
     model = MC()
     model.add_variables_from(self.variables, self.card)
     calls = [call(model, *p) for p in zip(self.variables, self.card)]
     add_var.assert_has_calls(calls)
Пример #13
0
    def reorder_parents(self, new_order, inplace=True):
        """
        Returns a new cpd table according to provided order

        Parameters
        ----------
        new_order: list
            list of new ordering of variables

        inplace: boolean
            If inplace == True it will modify the CPD itself
            otherwise new value will be returned without affecting old values

        Examples
        --------
        Consider a CPD P(grade| diff, intel)
        >>> cpd = TabularCPD('grade',3,[[0.1,0.1,0.1,0.1,0.1,0.1],
                                        [0.1,0.1,0.1,0.1,0.1,0.1],
                                        [0.8,0.8,0.8,0.8,0.8,0.8]],
                                    evidence=['diff', 'intel'], evidence_card=[2,3])
        >>> print(cpd)
        +---------+---------+---------+---------+---------+---------+---------+
        | diff    | diff_0  | diff_0  | diff_0  | diff_1  | diff_1  | diff_1  |
        +---------+---------+---------+---------+---------+---------+---------+
        | intel   | intel_0 | intel_1 | intel_2 | intel_0 | intel_1 | intel_2 |
        +---------+---------+---------+---------+---------+---------+---------+
        | grade_0 | 0.1     | 0.1     | 0.1     | 0.1     | 0.1     | 0.1     |
        +---------+---------+---------+---------+---------+---------+---------+
        | grade_1 | 0.1     | 0.1     | 0.1     | 0.1     | 0.1     | 0.1     |
        +---------+---------+---------+---------+---------+---------+---------+
        | grade_2 | 0.8     | 0.8     | 0.8     | 0.8     | 0.8     | 0.8     |
        +---------+---------+---------+---------+---------+---------+---------+
        >>> cpd.values
        array([[[ 0.1,  0.1,  0.1],
                [ 0.1,  0.1,  0.1]],

               [[ 0.1,  0.1,  0.1],
                [ 0.1,  0.1,  0.1]],

               [[ 0.8,  0.8,  0.8],
                [ 0.8,  0.8,  0.8]]])
        >>> cpd.variables
        ['grade', 'diff', 'intel']
        >>> cpd.cardinality
        array([3, 2, 3])
        >>> cpd.variable
        'grade'
        >>> cpd.variable_card
        3

        >>> cpd.reorder_parents(['intel', 'diff'])
        array([[ 0.1,  0.1,  0.2,  0.2,  0.1,  0.1],
               [ 0.1,  0.1,  0.1,  0.1,  0.1,  0.1],
               [ 0.8,  0.8,  0.7,  0.7,  0.8,  0.8]])
        >>> print(cpd)
        +---------+---------+---------+---------+---------+---------+---------+
        | intel   | intel_0 | intel_0 | intel_1 | intel_1 | intel_2 | intel_2 |
        +---------+---------+---------+---------+---------+---------+---------+
        | diff    | diff_0  | diff_1  | diff_0  | diff_1  | diff_0  | diff_1  |
        +---------+---------+---------+---------+---------+---------+---------+
        | grade_0 | 0.1     | 0.1     | 0.2     | 0.2     | 0.1     | 0.1     |
        +---------+---------+---------+---------+---------+---------+---------+
        | grade_1 | 0.1     | 0.1     | 0.1     | 0.1     | 0.1     | 0.1     |
        +---------+---------+---------+---------+---------+---------+---------+
        | grade_2 | 0.8     | 0.8     | 0.7     | 0.7     | 0.8     | 0.8     |
        +---------+---------+---------+---------+---------+---------+---------+

        >>> cpd.values
        array([[[ 0.1,  0.1],
                [ 0.2,  0.2],
                [ 0.1,  0.1]],

               [[ 0.1,  0.1],
                [ 0.1,  0.1],
                [ 0.1,  0.1]],

               [[ 0.8,  0.8],
                [ 0.7,  0.7],
                [ 0.8,  0.8]]])

        >>> cpd.variables
        ['grade', 'intel', 'diff']
        >>> cpd.cardinality
        array([3, 3, 2])
        >>> cpd.variable
        'grade'
        >>> cpd.variable_card
        3
        """
        if (len(self.variables) <= 1 or (set(new_order) - set(self.variables)) or
                (set(self.variables[1:]) - set(new_order))):
            raise ValueError("New order either has missing or extra arguments")
        else:
            if new_order != self.variables[1:]:
                evidence = self.variables[1:]
                evidence_card = self.cardinality[1:]
                card_map = dict(zip(evidence, evidence_card))
                old_pos_map = dict(zip(evidence, range(len(evidence))))
                trans_ord = [0] + [(old_pos_map[letter] + 1) for letter in new_order]
                new_values = np.transpose(self.values, trans_ord)

                if inplace:
                    variables = [self.variables[0]] + new_order
                    cardinality = [self.variable_card] + [card_map[var] for var in new_order]
                    super(TabularCPD, self).__init__(variables, cardinality, new_values.flatten('C'))
                    return self.get_cpd()
                else:
                    return new_values.reshape(self.cardinality[0], np.prod([card_map[var] for var in new_order]))
            else:
                warn("Same ordering provided as current")
                return self.get_cpd()
Пример #14
0
 def __repr__(self):
     var_card = ", ".join(['{var}:{card}'.format(var=var, card=card)
                           for var, card in zip(self.variables, self.cardinality)])
     return "<DiscreteFactor representing phi({var_card}) at {address}>".format(address=hex(id(self)), var_card=var_card)