def get_cardinality(self, check_cardinality=False): """ Returns a dictionary with the given factors as keys and their respective cardinality as values. Parameters ---------- check_cardinality: boolean, optional If, check_cardinality=True it checks if cardinality information for all the variables is availble or not. If not it raises an error. Examples -------- >>> from pgm.models import MarkovModel >>> from pgm.factors.discrete import DiscreteFactor >>> student = MarkovModel([('Alice', 'Bob'), ('Bob', 'Charles')]) >>> factor = DiscreteFactor(['Alice', 'Bob'], cardinality=[2, 2], ... values=np.random.rand(4)) >>> student.add_factors(factor) >>> student.get_cardinality() defaultdict(<class 'int'>, {'Bob': 2, 'Alice': 2}) """ cardinalities = defaultdict(int) for factor in self.factors: for variable, cardinality in zip(factor.scope(), factor.cardinality): cardinalities[variable] = cardinality if check_cardinality and len(self.nodes()) != len(cardinalities): raise ValueError('Factors for all the variables not defined') return cardinalities
def get_cardinality(self, check_cardinality=False): """ Returns a dictionary with the given factors as keys and their respective cardinality as values. Parameters ---------- check_cardinality: boolean, optional If, check_cardinality=True it checks if cardinality information for all the variables is availble or not. If not it raises an error. >>> from pgm.models import FactorGraph >>> from pgm.factors import DiscreteFactor >>> G = FactorGraph() >>> G.add_nodes_from(['a', 'b', 'c']) >>> phi1 = DiscreteFactor(['a', 'b'], [2, 2], np.random.rand(4)) >>> phi2 = DiscreteFactor(['b', 'c'], [2, 2], np.random.rand(4)) >>> G.add_nodes_from([phi1, phi2]) >>> G.add_edges_from([('a', phi1), ('b', phi1), ... ('b', phi2), ('c', phi2)]) >>> G.add_factors(phi1, phi2) >>> G.get_cardinality() defaultdict(<class 'int'>, {'c': 2, 'b': 2, 'a': 2}) """ cardinalities = defaultdict(int) for factor in self.factors: for variable, cardinality in zip(factor.scope(), factor.cardinality): cardinalities[variable] = cardinality if check_cardinality and len( self.get_variable_nodes()) != len(cardinalities): raise ValueError('Factors for all the variables not defined') return cardinalities
def check_model(self): """ Check the model for various errors. This method checks for the following errors - * Checks if the cardinalities of all the variables are consistent across all the factors. * Factors are defined for all the random variables. Returns ------- check: boolean True if all the checks are passed """ cardinalities = self.get_cardinality() for factor in self.factors: for variable, cardinality in zip(factor.scope(), factor.cardinality): if cardinalities[variable] != cardinality: raise ValueError( 'Cardinality of variable {var} not matching among factors' .format(var=variable)) for var1, var2 in itertools.combinations(factor.variables, 2): if var2 not in self.neighbors(var1): raise ValueError( "DiscreteFactor inconsistent with the model.") return True
def __init__(self, variables=None, card=None, start_state=None): """ Parameters: ----------- variables: array-like iterable object A list of variables of the model. card: array-like iterable object A list of cardinalities of the variables. start_state: array-like iterable object List of tuples representing the starting states of the variables. """ if variables is None: variables = [] if card is None: card = [] if not hasattr(variables, '__iter__') or isinstance( variables, six.string_types): raise ValueError('variables must be a non-string iterable.') if not hasattr(card, '__iter__') or isinstance(card, six.string_types): raise ValueError('card must be a non-string iterable.') self.variables = variables self.cardinalities = {v: c for v, c in zip(variables, card)} self.transition_models = {var: {} for var in variables} if start_state is None or self._check_state(start_state): self.state = start_state
def check_model(self): """ Check the model for various errors. This method checks for the following errors. * Checks if factors are defined for all the cliques or not. * Check for running intersection property is not done explicitly over here as it done in the add_edges method. * Check if cardinality of random variable remains same across all the factors. Returns ------- check: boolean True if all the checks are passed """ for clique in self.nodes(): factors = filter(lambda x: set(x.scope()) == set(clique), self.factors) if not any(factors): raise ValueError( 'Factors for all the cliques or clusters not defined.') cardinalities = self.get_cardinality() for factor in self.factors: for variable, cardinality in zip(factor.scope(), factor.cardinality): if (cardinalities[variable] != cardinality): raise ValueError( 'Cardinality of variable {var} not matching among factors' .format(var=variable)) return True
def __repr__(self): var_str = '<TabularCPD representing P({var}:{card}'.format( var=self.variable, card=self.variable_card) evidence = self.variables[1:] evidence_card = self.cardinality[1:] if evidence: evidence_str = ' | ' + ', '.join(['{var}:{card}'.format(var=var, card=card) for var, card in zip(evidence, evidence_card)]) else: evidence_str = '' return var_str + evidence_str + ') at {address}>'.format(address=hex(id(self)))
def check_model(self): """ Check the model for various errors. This method checks for the following errors. In the same time it also updates the cardinalities of all the random variables. * Check whether bipartite property of factor graph is still maintained or not. * Check whether factors are associated for all the random variables or not. * Check if factors are defined for each factor node or not. * Check if cardinality of random variable remains same across all the factors. """ variable_nodes = set( [x for factor in self.factors for x in factor.scope()]) factor_nodes = set(self.nodes()) - variable_nodes if not all( isinstance(factor_node, DiscreteFactor) for factor_node in factor_nodes): raise ValueError( 'Factors not associated for all the random variables') if (not (bipartite.is_bipartite(self)) or not (bipartite.is_bipartite_node_set(self, variable_nodes) or bipartite.is_bipartite_node_set(self, variable_nodes))): raise ValueError('Edges can only be between variables and factors') if len(factor_nodes) != len(self.factors): raise ValueError( 'Factors not associated with all the factor nodes.') cardinalities = self.get_cardinality() for factor in self.factors: for variable, cardinality in zip(factor.scope(), factor.cardinality): if (cardinalities[variable] != cardinality): raise ValueError( 'Cardinality of variable {var} not matching among factors' .format(var=variable)) return True
def add_variables_from(self, variables, cards): """ Add several variables to the model at once. Parameters: ----------- variables: array-like iterable object List of variables to be added. cards: array-like iterable object List of cardinalities of the variables to be added. Examples: --------- >>> from pgm.models import MarkovChain as MC >>> model = MC() >>> model.add_variables_from(['x', 'y'], [3, 4]) """ for var, card in zip(variables, cards): self.add_variable(var, card)
def assignment(self, index): """ Returns a list of assignments for the corresponding index. Parameters ---------- index: list, array-like List of indices whose assignment is to be computed Returns ------- list: Returns a list of full assignments of all the variables of the factor. Examples -------- >>> import numpy as np >>> from pgm.factors.discrete import DiscreteFactor >>> phi = DiscreteFactor(['diff', 'intel'], [2, 2], np.ones(4)) >>> phi.assignment([1, 2]) [[('diff', 0), ('intel', 1)], [('diff', 1), ('intel', 0)]] """ index = np.array(index) max_possible_index = np.prod(self.cardinality) - 1 if not all(i <= max_possible_index for i in index): raise IndexError("Index greater than max possible index") assignments = np.zeros((len(index), len(self.scope())), dtype=np.int) rev_card = self.cardinality[::-1] for i, card in enumerate(rev_card): assignments[:, i] = index % card index = index // card assignments = assignments[:, ::-1] return [[(key, val) for key, val in zip(self.variables, values)] for values in assignments]
def to_junction_tree(self): """ Creates a junction tree (or clique tree) for a given markov model. For a given markov model (H) a junction tree (G) is a graph 1. where each node in G corresponds to a maximal clique in H 2. each sepset in G separates the variables strictly on one side of the edge to other. Examples -------- >>> from pgm.models import MarkovModel >>> from pgm.factors.discrete import DiscreteFactor >>> mm = MarkovModel() >>> mm.add_nodes_from(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7']) >>> mm.add_edges_from([('x1', 'x3'), ('x1', 'x4'), ('x2', 'x4'), ... ('x2', 'x5'), ('x3', 'x6'), ('x4', 'x6'), ... ('x4', 'x7'), ('x5', 'x7')]) >>> phi = [DiscreteFactor(edge, [2, 2], np.random.rand(4)) for edge in mm.edges()] >>> mm.add_factors(*phi) >>> junction_tree = mm.to_junction_tree() """ from pgm.models import JunctionTree # Check whether the model is valid or not self.check_model() # Triangulate the graph to make it chordal triangulated_graph = self.triangulate() # Find maximal cliques in the chordal graph cliques = list(map(tuple, nx.find_cliques(triangulated_graph))) # If there is only 1 clique, then the junction tree formed is just a # clique tree with that single clique as the node if len(cliques) == 1: clique_trees = JunctionTree() clique_trees.add_node(cliques[0]) # Else if the number of cliques is more than 1 then create a complete # graph with all the cliques as nodes and weight of the edges being # the length of sepset between two cliques elif len(cliques) >= 2: complete_graph = UndirectedGraph() edges = list(itertools.combinations(cliques, 2)) weights = list( map(lambda x: len(set(x[0]).intersection(set(x[1]))), edges)) for edge, weight in zip(edges, weights): complete_graph.add_edge(*edge, weight=-weight) # Create clique trees by minimum (or maximum) spanning tree method clique_trees = JunctionTree( nx.minimum_spanning_tree(complete_graph).edges()) # Check whether the factors are defined for all the random variables or not all_vars = itertools.chain( *[factor.scope() for factor in self.factors]) if set(all_vars) != set(self.nodes()): ValueError( 'DiscreteFactor for all the random variables not specified') # Dictionary stating whether the factor is used to create clique # potential or not # If false, then it is not used to create any clique potential is_used = {factor: False for factor in self.factors} for node in clique_trees.nodes(): clique_factors = [] for factor in self.factors: # If the factor is not used in creating any clique potential as # well as has any variable of the given clique in its scope, # then use it in creating clique potential if not is_used[factor] and set(factor.scope()).issubset(node): clique_factors.append(factor) is_used[factor] = True # To compute clique potential, initially set it as unity factor var_card = [self.get_cardinality()[x] for x in node] clique_potential = DiscreteFactor(node, var_card, np.ones(np.product(var_card))) # multiply it with the factors associated with the variables present # in the clique (or node) clique_potential *= factor_product(*clique_factors) clique_trees.add_factors(clique_potential) if not all(is_used.values()): raise ValueError( 'All the factors were not used to create Junction Tree.' 'Extra factors are defined.') return clique_trees
def __repr__(self): var_card = ", ".join(['{var}:{card}'.format(var=var, card=card) for var, card in zip(self.variables, self.cardinality)]) return "<Joint Distribution representing P({var_card}) at {address}>".format(address=hex(id(self)), var_card=var_card)
def test_add_variables_from(self, add_var): model = MC() model.add_variables_from(self.variables, self.card) calls = [call(model, *p) for p in zip(self.variables, self.card)] add_var.assert_has_calls(calls)
def reorder_parents(self, new_order, inplace=True): """ Returns a new cpd table according to provided order Parameters ---------- new_order: list list of new ordering of variables inplace: boolean If inplace == True it will modify the CPD itself otherwise new value will be returned without affecting old values Examples -------- Consider a CPD P(grade| diff, intel) >>> cpd = TabularCPD('grade',3,[[0.1,0.1,0.1,0.1,0.1,0.1], [0.1,0.1,0.1,0.1,0.1,0.1], [0.8,0.8,0.8,0.8,0.8,0.8]], evidence=['diff', 'intel'], evidence_card=[2,3]) >>> print(cpd) +---------+---------+---------+---------+---------+---------+---------+ | diff | diff_0 | diff_0 | diff_0 | diff_1 | diff_1 | diff_1 | +---------+---------+---------+---------+---------+---------+---------+ | intel | intel_0 | intel_1 | intel_2 | intel_0 | intel_1 | intel_2 | +---------+---------+---------+---------+---------+---------+---------+ | grade_0 | 0.1 | 0.1 | 0.1 | 0.1 | 0.1 | 0.1 | +---------+---------+---------+---------+---------+---------+---------+ | grade_1 | 0.1 | 0.1 | 0.1 | 0.1 | 0.1 | 0.1 | +---------+---------+---------+---------+---------+---------+---------+ | grade_2 | 0.8 | 0.8 | 0.8 | 0.8 | 0.8 | 0.8 | +---------+---------+---------+---------+---------+---------+---------+ >>> cpd.values array([[[ 0.1, 0.1, 0.1], [ 0.1, 0.1, 0.1]], [[ 0.1, 0.1, 0.1], [ 0.1, 0.1, 0.1]], [[ 0.8, 0.8, 0.8], [ 0.8, 0.8, 0.8]]]) >>> cpd.variables ['grade', 'diff', 'intel'] >>> cpd.cardinality array([3, 2, 3]) >>> cpd.variable 'grade' >>> cpd.variable_card 3 >>> cpd.reorder_parents(['intel', 'diff']) array([[ 0.1, 0.1, 0.2, 0.2, 0.1, 0.1], [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [ 0.8, 0.8, 0.7, 0.7, 0.8, 0.8]]) >>> print(cpd) +---------+---------+---------+---------+---------+---------+---------+ | intel | intel_0 | intel_0 | intel_1 | intel_1 | intel_2 | intel_2 | +---------+---------+---------+---------+---------+---------+---------+ | diff | diff_0 | diff_1 | diff_0 | diff_1 | diff_0 | diff_1 | +---------+---------+---------+---------+---------+---------+---------+ | grade_0 | 0.1 | 0.1 | 0.2 | 0.2 | 0.1 | 0.1 | +---------+---------+---------+---------+---------+---------+---------+ | grade_1 | 0.1 | 0.1 | 0.1 | 0.1 | 0.1 | 0.1 | +---------+---------+---------+---------+---------+---------+---------+ | grade_2 | 0.8 | 0.8 | 0.7 | 0.7 | 0.8 | 0.8 | +---------+---------+---------+---------+---------+---------+---------+ >>> cpd.values array([[[ 0.1, 0.1], [ 0.2, 0.2], [ 0.1, 0.1]], [[ 0.1, 0.1], [ 0.1, 0.1], [ 0.1, 0.1]], [[ 0.8, 0.8], [ 0.7, 0.7], [ 0.8, 0.8]]]) >>> cpd.variables ['grade', 'intel', 'diff'] >>> cpd.cardinality array([3, 3, 2]) >>> cpd.variable 'grade' >>> cpd.variable_card 3 """ if (len(self.variables) <= 1 or (set(new_order) - set(self.variables)) or (set(self.variables[1:]) - set(new_order))): raise ValueError("New order either has missing or extra arguments") else: if new_order != self.variables[1:]: evidence = self.variables[1:] evidence_card = self.cardinality[1:] card_map = dict(zip(evidence, evidence_card)) old_pos_map = dict(zip(evidence, range(len(evidence)))) trans_ord = [0] + [(old_pos_map[letter] + 1) for letter in new_order] new_values = np.transpose(self.values, trans_ord) if inplace: variables = [self.variables[0]] + new_order cardinality = [self.variable_card] + [card_map[var] for var in new_order] super(TabularCPD, self).__init__(variables, cardinality, new_values.flatten('C')) return self.get_cpd() else: return new_values.reshape(self.cardinality[0], np.prod([card_map[var] for var in new_order])) else: warn("Same ordering provided as current") return self.get_cpd()
def __repr__(self): var_card = ", ".join(['{var}:{card}'.format(var=var, card=card) for var, card in zip(self.variables, self.cardinality)]) return "<DiscreteFactor representing phi({var_card}) at {address}>".format(address=hex(id(self)), var_card=var_card)