def prob_from_sample(self, state, sample=None, window_size=None): """ Given an instantiation (partial or complete) of the variables of the model, compute the probability of observing it over multiple windows in a given sample. If 'sample' is not passed as an argument, generate the statistic by sampling from the Markov Chain, starting with a random initial state. Examples: --------- >>> from pgm.models.MarkovChain import MarkovChain as MC >>> model = MC(['intel', 'diff'], [3, 2]) >>> intel_tm = {0: {0: 0.2, 1: 0.4, 2:0.4}, 1: {0: 0, 1: 0.5, 2: 0.5}, 2: {2: 1}} >>> model.add_transition_model('intel', intel_tm) >>> diff_tm = {0: {0: 0.5, 1: 0.5}, 1: {0: 0.25, 1:0.75}} >>> model.add_transition_model('diff', diff_tm) >>> model.prob_from_sample([State('diff', 0)]) array([ 0.27, 0.4 , 0.18, 0.23, ..., 0.29]) """ if sample is None: # generate sample of size 10000 sample = self.sample(self.random_state(), size=10000) if window_size is None: window_size = len(sample) // 100 # default window size is 100 windows = len(sample) // window_size probabilities = np.zeros(windows) for i in range(windows): for j in range(window_size): ind = i * window_size + j state_eq = [sample.loc[ind, v] == s for v, s in state] if all(state_eq): probabilities[i] += 1 return probabilities / window_size
def _make_table_str(self, tablefmt="fancy_grid", print_state_names=True): headers_list = [] # build column headers evidence = self.variables[1:] evidence_card = self.cardinality[1:] if evidence: col_indexes = np.array(list(product(*[range(i) for i in evidence_card]))) if self.state_names and print_state_names: for i in range(len(evidence_card)): column_header = [evidence[i]] + ['{var}({state})'.format (var=evidence[i], state=self.state_names[evidence[i]][d]) for d in col_indexes.T[i]] headers_list.append(column_header) else: for i in range(len(evidence_card)): column_header = [evidence[i]] + ['{s}_{d}'.format(s=evidence[i], d=d) for d in col_indexes.T[i]] headers_list.append(column_header) # Build row headers if self.state_names and print_state_names: variable_array = [['{var}({state})'.format (var=self.variable, state=self.state_names[self.variable][i]) for i in range(self.variable_card)]] else: variable_array = [['{s}_{d}'.format(s=self.variable, d=i) for i in range(self.variable_card)]] # Stack with data labeled_rows = np.hstack((np.array(variable_array).T, self.get_cpd())).tolist() # No support for multi-headers in tabulate cdf_str = tabulate(headers_list + labeled_rows, tablefmt=tablefmt) return cdf_str
def test_remove_multiple_factors(self): self.graph.add_nodes_from(['a', 'b', 'c']) phi1 = DiscreteFactor(['a', 'b'], [2, 2], range(4)) phi2 = DiscreteFactor(['b', 'c'], [2, 2], range(4)) self.graph.add_factors(phi1, phi2) self.graph.remove_factors(phi1, phi2) six.assertCountEqual(self, self.graph.factors, [])
def test_get_factors(self): self.graph.add_nodes_from(['a', 'b', 'c']) phi1 = DiscreteFactor(['a', 'b'], [2, 2], range(4)) phi2 = DiscreteFactor(['b', 'c'], [2, 2], range(4)) six.assertCountEqual(self, self.graph.get_factors(), []) self.graph.add_factors(phi1, phi2) six.assertCountEqual(self, self.graph.get_factors(), [phi1, phi2])
def _str(self, phi_or_p="phi", tablefmt="fancy_grid", print_state_names=True): """ Generate the string from `__str__` method. Parameters ---------- phi_or_p: 'phi' | 'p' 'phi': When used for Factors. 'p': When used for CPDs. print_state_names: boolean If True, the user defined state names are displayed. """ string_header = list(map(lambda x: six.text_type(x), self.scope())) string_header.append('{phi_or_p}({variables})'.format(phi_or_p=phi_or_p, variables=','.join(string_header))) value_index = 0 factor_table = [] for prob in product(*[range(card) for card in self.cardinality]): if self.state_names and print_state_names: prob_list = ["{var}({state})".format( var=list(self.variables)[i], state=self.state_names[list( self.variables)[i]][prob[i]]) for i in range(len(self.variables))] else: prob_list = ["{s}_{d}".format(s=list(self.variables)[i], d=prob[i]) for i in range(len(self.variables))] prob_list.append(self.values.ravel()[value_index]) factor_table.append(prob_list) value_index += 1 return tabulate(factor_table, headers=string_header, tablefmt=tablefmt, floatfmt=".4f")
def test_partition_function(self): self.graph.add_nodes_from(['a', 'b', 'c']) phi1 = DiscreteFactor(['a', 'b'], [2, 2], range(4)) phi2 = DiscreteFactor(['b', 'c'], [2, 2], range(4)) self.graph.add_factors(phi1, phi2) self.graph.add_edges_from([('a', 'b'), ('b', 'c')]) self.assertEqual(self.graph.get_partition_function(), 22.0)
def setUp(self): self.junction_tree = JunctionTree([(('A', 'B'), ('B', 'C')), (('B', 'C'), ('C', 'D'))]) phi1 = DiscreteFactor(['A', 'B'], [2, 3], range(6)) phi2 = DiscreteFactor(['B', 'C'], [3, 2], range(6)) phi3 = DiscreteFactor(['C', 'D'], [2, 2], range(4)) self.junction_tree.add_factors(phi1, phi2, phi3) self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'), ('G', 'L')]) cpd_a = TabularCPD('A', 2, values=[[0.2], [0.8]]) cpd_r = TabularCPD('R', 2, values=[[0.4], [0.6]]) cpd_j = TabularCPD('J', 2, values=[[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], evidence=['A', 'R'], evidence_card=[2, 2]) cpd_q = TabularCPD('Q', 2, values=[[0.9, 0.2], [0.1, 0.8]], evidence=['J'], evidence_card=[2]) cpd_l = TabularCPD('L', 2, values=[[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], evidence=['J', 'G'], evidence_card=[2, 2]) cpd_g = TabularCPD('G', 2, values=[[0.6], [0.4]]) self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r)
def test_factorset_divide(self): phi1 = DiscreteFactor(['x1', 'x2', 'x3'], [2, 3, 2], range(1, 13)) phi2 = DiscreteFactor(['x3', 'x4', 'x1'], [2, 2, 2], range(1, 9)) factor_set1 = FactorSet(phi1, phi2) phi3 = DiscreteFactor(['x5', 'x6', 'x7'], [2, 2, 2], range(1, 9)) phi4 = DiscreteFactor(['x5', 'x7', 'x8'], [2, 2, 2], range(1, 9)) factor_set2 = FactorSet(phi3, phi4) factor_set3 = factor_set2.divide(factor_set1, inplace=False) self.assertEqual( { phi3, phi4, phi1.identity_factor() / phi1, phi2.identity_factor() / phi2 }, factor_set3.factors)
def sample(self, start_state=None, size=1): """ Sample from the Markov Chain. Parameters: ----------- start_state: dict or array-like iterable Representing the starting states of the variables. If None is passed, a random start_state is chosen. size: int Number of samples to be generated. Return Type: ------------ pandas.DataFrame Examples: --------- >>> from pgm.factors import DiscreteFactor >>> from pgm.inference import GibbsSampling >>> from pgm.models import MarkovModel >>> model = MarkovModel([('A', 'B'), ('C', 'B')]) >>> factor_ab = DiscreteFactor(['A', 'B'], [2, 2], [1, 2, 3, 4]) >>> factor_cb = DiscreteFactor(['C', 'B'], [2, 2], [5, 6, 7, 8]) >>> model.add_factors(factor_ab, factor_cb) >>> gibbs = GibbsSampling(model) >>> gibbs.sample(size=4) A B C 0 0 1 1 1 1 0 0 2 1 1 0 3 1 1 1 """ if start_state is None and self.state is None: self.state = self.random_state() elif start_state is not None: self.set_start_state(start_state) sampled = DataFrame(index=range(size), columns=self.variables) sampled.loc[0] = [st for var, st in self.state] for i in range(size - 1): for j, (var, st) in enumerate(self.state): other_st = tuple(st for v, st in self.state if var != v) next_st = sample_discrete( list(range(self.cardinalities[var])), self.transition_models[var][other_st])[0] self.state[j] = State(var, next_st) sampled.loc[i + 1] = [st for var, st in self.state] return sampled
def get_independencies(self, latex=False): """ Computes independencies in the Bayesian Network, by checking d-seperation. Parameters ---------- latex: boolean If latex=True then latex string of the independence assertion would be created. Examples -------- >>> from pgm.models import BayesianModel >>> chain = BayesianModel([('X', 'Y'), ('Y', 'Z')]) >>> chain.get_independencies() (X _|_ Z | Y) (Z _|_ X | Y) """ independencies = Independencies() for start in (self.nodes()): rest = set(self.nodes()) - {start} for r in range(len(rest)): for observed in itertools.combinations(rest, r): d_seperated_variables = rest - set(observed) - set( self.active_trail_nodes(start, observed=observed)) if d_seperated_variables: independencies.add_assertions( [start, d_seperated_variables, observed]) independencies.reduce() if not latex: return independencies else: return independencies.latex_string()
def __eq__(self, other): if not (isinstance(self, DiscreteFactor) and isinstance(other, DiscreteFactor)): return False elif set(self.scope()) != set(other.scope()): return False else: phi = other.copy() for axis in range(self.values.ndim): exchange_index = phi.variables.index(self.variables[axis]) phi.variables[axis], phi.variables[exchange_index] = (phi.variables[exchange_index], phi.variables[axis]) phi.cardinality[axis], phi.cardinality[exchange_index] = (phi.cardinality[exchange_index], phi.cardinality[axis]) phi.values = phi.values.swapaxes(axis, exchange_index) if phi.values.shape != self.values.shape: return False elif not np.allclose(phi.values, self.values): return False elif not all(self.cardinality == phi.cardinality): return False else: return True
def get_edges(self): """ Returns the edges of the network. Returns ------- set: set containing the edges of the network Example ------- >>> reader = UAIReader('TestUAI.uai') >>> reader.get_edges() {('var_0', 'var_1'), ('var_0', 'var_2'), ('var_1', 'var_2')} """ edges = [] for function in range(0, self.no_functions): function_variables = self.grammar.parseString( self.network)['fun_' + str(function)] if isinstance(function_variables, int): function_variables = [function_variables] if self.network_type == 'BAYES': child_var = "var_" + str(function_variables[-1]) function_variables = function_variables[:-1] for var in function_variables: edges.append((child_var, "var_" + str(var))) elif self.network_type == "MARKOV": function_variables = [ "var_" + str(var) for var in function_variables ] edges.extend(list(combinations(function_variables, 2))) return set(edges)
def get_partition_function(self): """ Returns the partition function for a given undirected graph. A partition function is defined as .. math:: \sum_{X}(\prod_{i=1}^{m} \phi_i) where m is the number of factors present in the graph and X are all the random variables present. Examples -------- >>> from pgm.models import FactorGraph >>> from pgm.factors.discrete import DiscreteFactor >>> G = FactorGraph() >>> G.add_nodes_from(['a', 'b', 'c']) >>> phi1 = DiscreteFactor(['a', 'b'], [2, 2], np.random.rand(4)) >>> phi2 = DiscreteFactor(['b', 'c'], [2, 2], np.random.rand(4)) >>> G.add_factors(phi1, phi2) >>> G.add_nodes_from([phi1, phi2]) >>> G.add_edges_from([('a', phi1), ('b', phi1), ... ('b', phi2), ('c', phi2)]) >>> G.get_factors() >>> G.get_partition_function() """ factor = self.factors[0] factor = factor_product( factor, *[self.factors[i] for i in range(1, len(self.factors))]) if set(factor.scope()) != set(self.get_variable_nodes()): raise ValueError( 'DiscreteFactor for all the random variables not defined.') return np.sum(factor.values)
def _get_kernel_from_bayesian_model(self, model): """ Computes the Gibbs transition models from a Bayesian Network. 'Probabilistic Graphical Model Principles and Techniques', Koller and Friedman, Section 12.3.3 pp 512-513. Parameters: ----------- model: BayesianModel The model from which probabilities will be computed. """ self.variables = np.array(model.nodes()) self.cardinalities = { var: model.get_cpds(var).variable_card for var in self.variables } for var in self.variables: other_vars = [v for v in self.variables if var != v] other_cards = [self.cardinalities[v] for v in other_vars] cpds = [cpd for cpd in model.cpds if var in cpd.scope()] prod_cpd = factor_product(*cpds) kernel = {} scope = set(prod_cpd.scope()) for tup in itertools.product( *[range(card) for card in other_cards]): states = [ State(v, s) for v, s in zip(other_vars, tup) if v in scope ] prod_cpd_reduced = prod_cpd.reduce(states, inplace=False) kernel[tup] = prod_cpd_reduced.values / sum( prod_cpd_reduced.values) self.transition_models[var] = kernel
def get_partition_function(self): r""" Returns the partition function for a given undirected graph. A partition function is defined as .. math:: \sum_{X}(\prod_{i=1}^{m} \phi_i) where m is the number of factors present in the graph and X are all the random variables present. Examples -------- >>> from pgm.models import ClusterGraph >>> from pgm.factors.discrete import DiscreteFactor >>> G = ClusterGraph() >>> G.add_nodes_from([('a', 'b', 'c'), ('a', 'b'), ('a', 'c')]) >>> G.add_edges_from([(('a', 'b', 'c'), ('a', 'b')), ... (('a', 'b', 'c'), ('a', 'c'))]) >>> phi1 = DiscreteFactor(['a', 'b', 'c'], [2, 2, 2], np.random.rand(8)) >>> phi2 = DiscreteFactor(['a', 'b'], [2, 2], np.random.rand(4)) >>> phi3 = DiscreteFactor(['a', 'c'], [2, 2], np.random.rand(4)) >>> G.add_factors(phi1, phi2, phi3) >>> G.get_partition_function() """ if self.check_model(): factor = self.factors[0] factor = factor_product( factor, *[self.factors[i] for i in range(1, len(self.factors))]) return np.sum(factor.values)
def get_partition_function(self): """ Returns the partition function for a given undirected graph. A partition function is defined as .. math:: \sum_{X}(\prod_{i=1}^{m} \phi_i) where m is the number of factors present in the graph and X are all the random variables present. Examples -------- >>> from pgm.models import MarkovModel >>> from pgm.factors.discrete import DiscreteFactor >>> G = MarkovModel() >>> G.add_nodes_from(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7']) >>> G.add_edges_from([('x1', 'x3'), ('x1', 'x4'), ('x2', 'x4'), ... ('x2', 'x5'), ('x3', 'x6'), ('x4', 'x6'), ... ('x4', 'x7'), ('x5', 'x7')]) >>> phi = [DiscreteFactor(edge, [2, 2], np.random.rand(4)) for edge in G.edges()] >>> G.add_factors(*phi) >>> G.get_partition_function() """ self.check_model() factor = self.factors[0] factor = factor_product( factor, *[self.factors[i] for i in range(1, len(self.factors))]) if set(factor.scope()) != set(self.nodes()): raise ValueError( 'DiscreteFactor for all the random variables not defined.') return np.sum(factor.values)
def set_distributions(self): """ Set distributions in the network. Examples -------- >>> from pgm.readwrite.XMLBeliefNetwork import XBNWriter >>> writer =XBNWriter() >>> writer.set_distributions() """ distributions = etree.SubElement(self.bnmodel, 'DISTRIBUTIONS') cpds = self.model.get_cpds() cpds.sort(key=lambda x: x.variable) for cpd in cpds: cpd_values = cpd.values.ravel() var = cpd.variable dist = etree.SubElement(distributions, 'DIST', attrib={'TYPE': self.model.node[var]['TYPE']}) etree.SubElement(dist, 'PRIVATE', attrib={'NAME': var}) dpis = etree.SubElement(dist, 'DPIS') evidence = cpd.variables[:0:-1] if evidence: condset = etree.SubElement(dist, 'CONDSET') for condelem in sorted(evidence): etree.SubElement(condset, 'CONDELEM', attrib={'NAME': condelem}) # TODO: Get Index value. for val in range(0, len(cpd_values), 2): etree.SubElement(dpis, "DPI", attrib={'INDEXES': ' '}).text = \ " " + str(cpd_values[val]) + " " + str(cpd_values[val+1]) + " " else: etree.SubElement(dpis, "DPI").text = ' ' + ' '.join(map(str, cpd_values))
def get_states(self): """ Add states to variable of BIF Returns ------- dict: dict of type {variable: a list of states} Example ------- >>> from pgm.readwrite import BIFReader, BIFWriter >>> model = BIFReader('dog-problem.bif').get_model() >>> writer = BIFWriter(model) >>> writer.get_states() {'bowel-problem': ['bowel-problem_0', 'bowel-problem_1'], 'dog-out': ['dog-out_0', 'dog-out_1'], 'family-out': ['family-out_0', 'family-out_1'], 'hear-bark': ['hear-bark_0', 'hear-bark_1'], 'light-on': ['light-on_0', 'light-on_1']} """ variable_states = {} cpds = self.model.get_cpds() for cpd in cpds: variable = cpd.variable variable_states[variable] = [] for state in range(cpd.get_cardinality([variable])[variable]): variable_states[variable].append( str(variable) + '_' + str(state)) return variable_states
def forward_sample(self, size=1): """ Generates sample(s) from joint distribution of the bayesian network. Parameters ---------- size: int size of sample to be generated Returns ------- sampled: pandas.DataFrame the generated samples Examples -------- >>> from pgm.models.BayesianModel import BayesianModel >>> from pgm.factors.discrete import TabularCPD >>> from pgm.sampling import BayesianModelSampling >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) >>> cpd_d = TabularCPD('diff', 2, [[0.6], [0.4]]) >>> cpd_i = TabularCPD('intel', 2, [[0.7], [0.3]]) >>> cpd_g = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, ... 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], ... ['intel', 'diff'], [2, 2]) >>> student.add_cpds(cpd_d, cpd_i, cpd_g) >>> inference = BayesianModelSampling(student) >>> inference.forward_sample(2) diff intel grade 0 1 0 1 1 1 0 2 """ sampled = DataFrame(index=range(size), columns=self.topological_order) for node in self.topological_order: cpd = self.model.get_cpds(node) states = range(self.cardinality[node]) evidence = cpd.variables[:0:-1] if evidence: cached_values = self.pre_compute_reduce(variable=node) evidence = sampled.ix[:, evidence].values weights = list(map(lambda t: cached_values[tuple(t)], evidence)) else: weights = cpd.values sampled[node] = sample_discrete(states, weights, size) return sampled
def _find_common_cliques(cliques_list): """ Finds the common cliques among the given set of cliques for corresponding node. """ common = set([tuple(x) for x in cliques_list[0]]) for i in range(1, len(cliques_list)): common = common & set([tuple(x) for x in cliques_list[i]]) return list(common)
def to_bayesian_model(self): """ Creates a Bayesian Model which is a minimum I-Map for this markov model. The ordering of parents may not remain constant. It would depend on the ordering of variable in the junction tree (which is not constant) all the time. Examples -------- >>> from pgm.models import MarkovModel >>> from pgm.factors.discrete import DiscreteFactor >>> mm = MarkovModel() >>> mm.add_nodes_from(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7']) >>> mm.add_edges_from([('x1', 'x3'), ('x1', 'x4'), ('x2', 'x4'), ... ('x2', 'x5'), ('x3', 'x6'), ('x4', 'x6'), ... ('x4', 'x7'), ('x5', 'x7')]) >>> phi = [DiscreteFactor(edge, [2, 2], np.random.rand(4)) for edge in mm.edges()] >>> mm.add_factors(*phi) >>> bm = mm.to_bayesian_model() """ from pgm.models import BayesianModel bm = BayesianModel() var_clique_dict = defaultdict(tuple) var_order = [] # Create a junction tree from the markov model. # Creation of clique tree involves triangulation, finding maximal cliques # and creating a tree from these cliques junction_tree = self.to_junction_tree() # create an ordering of the nodes based on the ordering of the clique # in which it appeared first root_node = junction_tree.nodes()[0] bfs_edges = nx.bfs_edges(junction_tree, root_node) for node in root_node: var_clique_dict[node] = root_node var_order.append(node) for edge in bfs_edges: clique_node = edge[1] for node in clique_node: if not var_clique_dict[node]: var_clique_dict[node] = clique_node var_order.append(node) # create a bayesian model by adding edges from parent of node to node as # par(x_i) = (var(c_k) - x_i) \cap {x_1, ..., x_{i-1}} for node_index in range(len(var_order)): node = var_order[node_index] node_parents = (set(var_clique_dict[node]) - set([node])).intersection( set(var_order[:node_index])) bm.add_edges_from([(parent, node) for parent in node_parents]) # TODO : Convert factor into CPDs return bm
def reduce(self, values, inplace=True): """ Reduces the factor to the context of given variable values. Parameters ---------- values: list, array-like A list of tuples of the form (variable_name, variable_state). inplace: boolean If inplace=True it will modify the factor itself, else would return a new factor. Returns ------- DiscreteFactor or None: if inplace=True (default) returns None if inplace=False returns a new `DiscreteFactor` instance. Examples -------- >>> from pgm.factors.discrete import DiscreteFactor >>> phi = DiscreteFactor(['x1', 'x2', 'x3'], [2, 3, 2], range(12)) >>> phi.reduce([('x1', 0), ('x2', 0)]) >>> phi.variables ['x3'] >>> phi.cardinality array([2]) >>> phi.values array([0., 1.]) """ if isinstance(values, six.string_types): raise TypeError("values: Expected type list or array-like, got type str") if (any(isinstance(value, six.string_types) for value in values) or not all(isinstance(state, (int, np.integer)) for var, state in values)): raise TypeError("values: must contain tuples or array-like elements of the form " "(hashable object, type int)") phi = self if inplace else self.copy() var_index_to_del = [] slice_ = [slice(None)] * len(self.variables) for var, state in values: var_index = phi.variables.index(var) slice_[var_index] = state var_index_to_del.append(var_index) var_index_to_keep = sorted(set(range(len(phi.variables))) - set(var_index_to_del)) # set difference is not gaurenteed to maintain ordering phi.variables = [phi.variables[index] for index in var_index_to_keep] phi.cardinality = phi.cardinality[var_index_to_keep] phi.values = phi.values[tuple(slice_)] if not inplace: return phi
def _tighten_triplet(self, max_iterations, later_iter, max_triplets, prolong): """ This method finds all the triplets that are eligible and adds them iteratively in the bunch of max_triplets Parameters ---------- max_iterations: integer Maximum number of times we tighten the relaxation later_iter: integer Number of maximum iterations that we want MPLP to run. This is lesser than the initial number of iterations. max_triplets: integer Maximum number of triplets that can be added atmost in one iteration. prolong: bool It sets the continuation of tightening after all the triplets are exhausted """ # Find all the triplets that are possible in the present model triangles = self.find_triangles() # Evaluate scores for each of the triplets found above triplet_scores = self._get_triplet_scores(triangles) # Arrange the keys on the basis of increasing order of the values of the dict. triplet_scores sorted_scores = sorted(triplet_scores, key=triplet_scores.get) for niter in range(max_iterations): if self._is_converged(integrality_gap_threshold=self.integrality_gap_threshold): break # add triplets that are yet not added. add_triplets = [] for triplet_number in (range(len(sorted_scores))): # At once, we can add atmost 5 triplets if triplet_number >= max_triplets: break add_triplets.append(sorted_scores.pop()) # Break from the tighten triplets loop if there are no triplets to add if the prolong is set to False if not add_triplets and prolong is False: break # Update the eligible triplets to tighten the relaxation self._update_triangles(add_triplets) # Run MPLP for a maximum of later_iter times. self._run_mplp(later_iter)
def maximize(self, variables, inplace=True): """ Maximizes the factor with respect to `variables`. Parameters ---------- variables: list, array-like List of variables with respect to which factor is to be maximized inplace: boolean If inplace=True it will modify the factor itself, else would return a new factor. Returns ------- DiscreteFactor or None: if inplace=True (default) returns None if inplace=False returns a new `DiscreteFactor` instance. Examples -------- >>> from pgm.factors.discrete import DiscreteFactor >>> phi = DiscreteFactor(['x1', 'x2', 'x3'], [3, 2, 2], [0.25, 0.35, 0.08, 0.16, 0.05, 0.07, ... 0.00, 0.00, 0.15, 0.21, 0.09, 0.18]) >>> phi.variables ['x1','x2','x3'] >>> phi.maximize(['x2']) >>> phi.variables ['x1', 'x3'] >>> phi.cardinality array([3, 2]) >>> phi.values array([[ 0.25, 0.35], [ 0.05, 0.07], [ 0.15, 0.21]]) """ if isinstance(variables, six.string_types): raise TypeError("variables: Expected type list or array-like, got type str") phi = self if inplace else self.copy() for var in variables: if var not in phi.variables: raise ValueError("{var} not in scope.".format(var=var)) var_indexes = [phi.variables.index(var) for var in variables] index_to_keep = sorted(set(range(len(self.variables))) - set(var_indexes)) phi.variables = [phi.variables[index] for index in index_to_keep] phi.cardinality = phi.cardinality[index_to_keep] phi.values = np.max(phi.values, axis=tuple(var_indexes)) if not inplace: return phi
def get_grammar(self): """ Returns the grammar of the UAI file. """ network_name = Word(alphas).setResultsName('network_name') no_variables = Word(nums).setResultsName('no_variables') grammar = network_name + no_variables self.no_variables = int( grammar.parseString(self.network)['no_variables']) domain_variables = ( Word(nums) * self.no_variables).setResultsName('domain_variables') grammar += domain_variables no_functions = Word(nums).setResultsName('no_functions') grammar += no_functions self.no_functions = int( grammar.parseString(self.network)['no_functions']) integer = Word(nums).setParseAction(lambda t: int(t[0])) for function in range(0, self.no_functions): scope_grammar = Word(nums).setResultsName('fun_scope_' + str(function)) grammar += scope_grammar function_scope = grammar.parseString(self.network)['fun_scope_' + str(function)] function_grammar = ( (integer) * int(function_scope)).setResultsName('fun_' + str(function)) grammar += function_grammar floatnumber = Combine( Word(nums) + Optional(Literal(".") + Optional(Word(nums)))) for function in range(0, self.no_functions): no_values_grammar = Word(nums).setResultsName('fun_no_values_' + str(function)) grammar += no_values_grammar no_values = grammar.parseString(self.network)['fun_no_values_' + str(function)] values_grammar = ((floatnumber) * int(no_values)).setResultsName('fun_values_' + str(function)) grammar += values_grammar return grammar
def __hash__(self): variable_hashes = [hash(variable) for variable in self.variables] sorted_var_hashes = sorted(variable_hashes) phi = self.copy() for axis in range(phi.values.ndim): exchange_index = variable_hashes.index(sorted_var_hashes[axis]) variable_hashes[axis], variable_hashes[exchange_index] = (variable_hashes[exchange_index], variable_hashes[axis]) phi.cardinality[axis], phi.cardinality[exchange_index] = (phi.cardinality[exchange_index], phi.cardinality[axis]) phi.values = phi.values.swapaxes(axis, exchange_index) return hash(str(sorted_var_hashes) + str(phi.values) + str(phi.cardinality))
def test_get_model(self): edges_expected = [('family-out', 'dog-out'), ('bowel-problem', 'dog-out'), ('family-out', 'light-on'), ('dog-out', 'hear-bark')] nodes_expected = [ 'bowel-problem', 'hear-bark', 'light-on', 'dog-out', 'family-out' ] edge_expected = { 'bowel-problem': { 'dog-out': {} }, 'dog-out': { 'hear-bark': {} }, 'family-out': { 'dog-out': {}, 'light-on': {} }, 'hear-bark': {}, 'light-on': {} } node_expected = { 'bowel-problem': { 'position': '(335, 99)' }, 'dog-out': { 'position': '(300, 195)' }, 'family-out': { 'position': '(257, 99)' }, 'hear-bark': { 'position': '(296, 268)' }, 'light-on': { 'position': '(218, 195)' } } cpds_expected = [ np.array([[0.01], [0.99]]), np.array([[0.99, 0.97, 0.9, 0.3], [0.01, 0.03, 0.1, 0.7]]), np.array([[0.15], [0.85]]), np.array([[0.7, 0.01], [0.3, 0.99]]), np.array([[0.6, 0.05], [0.4, 0.95]]) ] model = self.reader.get_model() for cpd_index in range(0, len(cpds_expected)): np_test.assert_array_equal(model.get_cpds()[cpd_index].get_cpd(), cpds_expected[cpd_index]) self.assertDictEqual(model.node, node_expected) self.assertDictEqual(model.edge, edge_expected) self.assertListEqual(sorted(model.nodes()), sorted(nodes_expected)) self.assertListEqual(sorted(model.edges()), sorted(edges_expected))
def pre_compute_reduce(self, variable): variable_cpd = self.model.get_cpds(variable) variable_evid = variable_cpd.variables[:0:-1] cached_values = {} for state_combination in itertools.product( *[range(self.cardinality[var]) for var in variable_evid]): states = list(zip(variable_evid, state_combination)) cached_values[state_combination] = variable_cpd.reduce( states, inplace=False).values return cached_values
def test_max_calibrate_sepset_belief(self): belief_propagation = BeliefPropagation(self.junction_tree) belief_propagation.max_calibrate() sepset_belief = belief_propagation.get_sepset_beliefs() phi1 = DiscreteFactor(['A', 'B'], [2, 3], range(6)) phi2 = DiscreteFactor(['B', 'C'], [3, 2], range(6)) phi3 = DiscreteFactor(['C', 'D'], [2, 2], range(4)) b_B = (phi1 * (phi3.maximize(['D'], inplace=False) * phi2).maximize( ['C'], inplace=False)).maximize(['A'], inplace=False) b_C = (phi2 * (phi1.maximize(['A'], inplace=False) * phi3.maximize(['D'], inplace=False))).maximize( ['B'], inplace=False) np_test.assert_array_almost_equal( sepset_belief[frozenset((('A', 'B'), ('B', 'C')))].values, b_B.values) np_test.assert_array_almost_equal( sepset_belief[frozenset((('B', 'C'), ('C', 'D')))].values, b_C.values)
def setUp(self): self.variables = ['intel', 'diff', 'grade'] self.card = [3, 2, 3] self.cardinalities = {'intel': 3, 'diff': 2, 'grade': 3} self.intel_tm = {0: {0: 0.1, 1: 0.25, 2: 0.65}, 1: {0: 0.5, 1: 0.3, 2: 0.2}, 2: {0: 0.3, 1: 0.3, 2: 0.4}} self.diff_tm = {0: {0: 0.3, 1: 0.7}, 1: {0: 0.75, 1: 0.25}} self.grade_tm = {0: {0: 0.4, 1: 0.2, 2: 0.4}, 1: {0: 0.9, 1: 0.05, 2: 0.05}, 2: {0: 0.1, 1: 0.4, 2: 0.5}} self.start_state = [State('intel', 0), State('diff', 1), State('grade', 2)] self.model = MC() self.sample = DataFrame(index=range(200), columns=['a', 'b']) self.sample.a = [1] * 100 + [0] * 100 self.sample.b = [0] * 100 + [1] * 100