def get_factors(self, node=None): """ Returns the factors that have been added till now to the graph. If node is not None, it would return the factor corresponding to the given node. Examples -------- >>> from pgm.models import FactorGraph >>> from pgm.factors.discrete import DiscreteFactor >>> G = FactorGraph() >>> G.add_nodes_from(['a', 'b', 'c']) >>> phi1 = DiscreteFactor(['a', 'b'], [2, 2], np.random.rand(4)) >>> phi2 = DiscreteFactor(['b', 'c'], [2, 2], np.random.rand(4)) >>> G.add_factors(phi1, phi2) >>> G.add_nodes_from([phi1, phi2]) >>> G.add_edges_from([('a', phi1), ('b', phi1), ... ('b', phi2), ('c', phi2)]) >>> G.get_factors() >>> G.get_factors(node=phi1) """ if node is None: return self.factors else: factor_nodes = self.get_factor_nodes() if node not in factor_nodes: raise ValueError('Factors are not associated with the ' 'corresponding node.') factors = list( filter(lambda x: set(x.scope()) == set(self.neighbors(node)), self.factors)) return factors[0]
def get_factors(self, node=None): """ Return the factors that have been added till now to the graph. If node is not None, it would return the factor corresponding to the given node. Examples -------- >>> from pgm.models import ClusterGraph >>> from pgm.factors.discrete import DiscreteFactor >>> G = ClusterGraph() >>> G.add_nodes_from([('a', 'b', 'c'), ('a', 'b'), ('a', 'c')]) >>> G.add_edges_from([(('a', 'b', 'c'), ('a', 'b')), ... (('a', 'b', 'c'), ('a', 'c'))]) >>> phi1 = DiscreteFactor(['a', 'b', 'c'], [2, 2, 2], np.random.rand(8)) >>> phi2 = DiscreteFactor(['a', 'b'], [2, 2], np.random.rand(4)) >>> phi3 = DiscreteFactor(['a', 'c'], [2, 2], np.random.rand(4)) >>> G.add_factors(phi1, phi2, phi3) >>> G.get_factors() >>> G.get_factors(node=('a', 'b', 'c')) """ if node is None: return self.factors else: nodes = [set(n) for n in self.nodes()] if set(node) not in nodes: raise ValueError('Node not present in Cluster Graph') factors = filter(lambda x: set(x.scope()) == set(node), self.factors) return next(factors)
def check_model(self): """ Check the model for various errors. This method checks for the following errors. * Checks if factors are defined for all the cliques or not. * Check for running intersection property is not done explicitly over here as it done in the add_edges method. * Check if cardinality of random variable remains same across all the factors. Returns ------- check: boolean True if all the checks are passed """ for clique in self.nodes(): factors = filter(lambda x: set(x.scope()) == set(clique), self.factors) if not any(factors): raise ValueError( 'Factors for all the cliques or clusters not defined.') cardinalities = self.get_cardinality() for factor in self.factors: for variable, cardinality in zip(factor.scope(), factor.cardinality): if (cardinalities[variable] != cardinality): raise ValueError( 'Cardinality of variable {var} not matching among factors' .format(var=variable)) return True
def induced_graph(self, elimination_order): """ Returns the induced graph formed by running Variable Elimination on the network. Parameters ---------- elimination_order: list, array like List of variables in the order in which they are to be eliminated. Examples -------- >>> import numpy as np >>> import pandas as pd >>> from pgm.models import BayesianModel >>> from pgm.inference import VariableElimination >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)), ... columns=['A', 'B', 'C', 'D', 'E']) >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')]) >>> model.fit(values) >>> inference = VariableElimination(model) >>> inference.induced_graph(['C', 'D', 'A', 'B', 'E']) <networkx.classes.graph.Graph at 0x7f34ac8c5160> """ # If the elimination order does not contain the same variables as the model if set(elimination_order) != set(self.variables): raise ValueError("Set of variables in elimination order" " different from variables in model") eliminated_variables = set() working_factors = { node: [factor.scope() for factor in self.factors[node]] for node in self.factors } # The set of cliques that should be in the induced graph cliques = set() for factors in working_factors.values(): for factor in factors: cliques.add(tuple(factor)) # Removing all the factors containing the variables which are # eliminated (as all the factors should be considered only once) for var in elimination_order: factors = [ factor for factor in working_factors[var] if not set(factor).intersection(eliminated_variables) ] phi = set(itertools.chain(*factors)).difference({var}) cliques.add(tuple(phi)) del working_factors[var] for variable in phi: working_factors[variable].append(list(phi)) eliminated_variables.add(var) edges_comb = [ itertools.combinations(c, 2) for c in filter(lambda x: len(x) > 1, cliques) ] return nx.Graph(itertools.chain(*edges_comb))
def test_factorset_marginalize_not_inplace(self): factor_set = FactorSet(self.phi1, self.phi2, self.phi3, self.phi4) new_factor_set = factor_set.marginalize(['x1', 'x5'], inplace=False) phi1_equivalent_in_factor_set = list( filter(lambda x: set(x.scope()) == {'x2', 'x3'}, new_factor_set.factors))[0] self.assertEqual(self.phi1.marginalize(['x1'], inplace=False), phi1_equivalent_in_factor_set) phi2_equivalent_in_factor_set = list( filter(lambda x: set(x.scope()) == {'x4', 'x3'}, new_factor_set.factors))[0] self.assertEqual(self.phi2.marginalize(['x1'], inplace=False), phi2_equivalent_in_factor_set) phi3_equivalent_in_factor_set = list( filter(lambda x: set(x.scope()) == {'x6', 'x7'}, new_factor_set.factors))[0] self.assertEqual(self.phi3.marginalize(['x5'], inplace=False), phi3_equivalent_in_factor_set) phi4_equivalent_in_factor_set = list( filter(lambda x: set(x.scope()) == {'x8', 'x7'}, new_factor_set.factors))[0] self.assertEqual(self.phi4.marginalize(['x5'], inplace=False), phi4_equivalent_in_factor_set)
def marginalize(self, variables, inplace=True): """ Marginalizes the factors present in the factor sets with respect to the given variables. Parameters ---------- variables: list, array-like List of the variables to be marginalized. inplace: boolean (Default value True) If inplace=True it will modify the factor set itself, would create a new factor set Returns ------- If inplace = False, will return a new marginalized FactorSet object. Examples -------- >>> from pgm.factors import FactorSet >>> from pgm.factors.discrete import DiscreteFactor >>> phi1 = DiscreteFactor(['x1', 'x2', 'x3'], [2, 3, 2], range(12)) >>> phi2 = DiscreteFactor(['x3', 'x4', 'x1'], [2, 2, 2], range(8)) >>> factor_set1 = FactorSet(phi1, phi2) >>> factor_set1.marginalize('x1') >>> print(factor_set1) set([<DiscreteFactor representing phi(x2:3, x3:2) at 0x7f8e32b4cc10>, <DiscreteFactor representing phi(x3:2, x4:2) at 0x7f8e32b4cf90>]) """ if isinstance(variables, six.string_types): raise TypeError('Expected list or array-like type got type str') factor_set = self if inplace else self.copy() factors_to_be_marginalized = set( filter(lambda x: set(x.scope()).intersection(variables), factor_set.factors)) for factor in factors_to_be_marginalized: variables_to_be_marginalized = list( set(factor.scope()).intersection(variables)) if inplace: factor.marginalize(variables_to_be_marginalized, inplace=True) else: factor_set.remove_factors(factor) factor_set.add_factors( factor.marginalize(variables_to_be_marginalized, inplace=False)) if not inplace: return factor_set
def find_triangles(self): """ Finds all the triangles present in the given model Examples -------- >>> from pgm.models import MarkovModel >>> from pgm.factors.discrete import DiscreteFactor >>> from pgm.inference import Mplp >>> mm = MarkovModel() >>> mm.add_nodes_from(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7']) >>> mm.add_edges_from([('x1', 'x3'), ('x1', 'x4'), ('x2', 'x4'), ... ('x2', 'x5'), ('x3', 'x6'), ('x4', 'x6'), ... ('x4', 'x7'), ('x5', 'x7')]) >>> phi = [DiscreteFactor(edge, [2, 2], np.random.rand(4)) for edge in mm.edges()] >>> mm.add_factors(*phi) >>> mplp = Mplp(mm) >>> mplp.find_triangles() """ return list(filter(lambda x: len(x) == 3, nx.find_cliques(self.model)))
def _query(self, variables, operation, evidence=None): """ This is a generalized query method that can be used for both query and map query. Parameters ---------- variables: list list of variables for which you want to compute the probability operation: str ('marginalize' | 'maximize') The operation to do for passing messages between nodes. evidence: dict a dict key, value pair as {var: state_of_var_observed} None if no evidence Examples -------- >>> from pgm.inference import BeliefPropagation >>> from pgm.models import BayesianModel >>> import numpy as np >>> import pandas as pd >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)), ... columns=['A', 'B', 'C', 'D', 'E']) >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')]) >>> model.fit(values) >>> inference = BeliefPropagation(model) >>> phi_query = inference.query(['A', 'B']) References ---------- Algorithm 10.4 Out-of-clique inference in clique tree Probabilistic Graphical Models: Principles and Techniques Daphne Koller and Nir Friedman. """ is_calibrated = self._is_converged(operation=operation) # Calibrate the junction tree if not calibrated if not is_calibrated: self.calibrate() if not isinstance(variables, (list, tuple, set)): query_variables = [variables] else: query_variables = list(variables) query_variables.extend(evidence.keys() if evidence else []) # Find a tree T' such that query_variables are a subset of scope(T') nodes_with_query_variables = set() for var in query_variables: nodes_with_query_variables.update( filter(lambda x: var in x, self.junction_tree.nodes())) subtree_nodes = nodes_with_query_variables # Conversion of set to tuple just for indexing nodes_with_query_variables = tuple(nodes_with_query_variables) # As junction tree is a tree, that means that there would be only path between any two nodes in the tree # thus we can just take the path between any two nodes; no matter there order is for i in range(len(nodes_with_query_variables) - 1): subtree_nodes.update( nx.shortest_path(self.junction_tree, nodes_with_query_variables[i], nodes_with_query_variables[i + 1])) subtree_undirected_graph = self.junction_tree.subgraph(subtree_nodes) # Converting subtree into a junction tree if len(subtree_nodes) == 1: subtree = JunctionTree() subtree.add_node(subtree_nodes.pop()) else: subtree = JunctionTree(subtree_undirected_graph.edges()) # Selecting a node is root node. Root node would be having only one neighbor if len(subtree.nodes()) == 1: root_node = subtree.nodes()[0] else: root_node = tuple( filter(lambda x: len(subtree.neighbors(x)) == 1, subtree.nodes()))[0] clique_potential_list = [self.clique_beliefs[root_node]] # For other nodes in the subtree compute the clique potentials as follows # As all the nodes are nothing but tuples so simple set(root_node) won't work at it would update the set with' # all the elements of the tuple; instead use set([root_node]) as it would include only the tuple not the # internal elements within it. parent_nodes = set([root_node]) nodes_traversed = set() while parent_nodes: parent_node = parent_nodes.pop() for child_node in set( subtree.neighbors(parent_node)) - nodes_traversed: clique_potential_list.append( self.clique_beliefs[child_node] / self.sepset_beliefs[frozenset([parent_node, child_node])]) parent_nodes.update([child_node]) nodes_traversed.update([parent_node]) # Add factors to the corresponding junction tree subtree.add_factors(*clique_potential_list) # Sum product variable elimination on the subtree variable_elimination = VariableElimination(subtree) if operation == 'marginalize': return variable_elimination.query(variables=variables, evidence=evidence) elif operation == 'maximize': return variable_elimination.map_query(variables=variables, evidence=evidence)