Пример #1
0
    def prob_from_sample(self, state, sample=None, window_size=None):
        """
        Given an instantiation (partial or complete) of the variables of the model,
        compute the probability of observing it over multiple windows in a given sample.

        If 'sample' is not passed as an argument, generate the statistic by sampling from the
        Markov Chain, starting with a random initial state.

        Examples:
        ---------
        >>> from pgm.models.MarkovChain import MarkovChain as MC
        >>> model = MC(['intel', 'diff'], [3, 2])
        >>> intel_tm = {0: {0: 0.2, 1: 0.4, 2:0.4}, 1: {0: 0, 1: 0.5, 2: 0.5}, 2: {2: 1}}
        >>> model.add_transition_model('intel', intel_tm)
        >>> diff_tm = {0: {0: 0.5, 1: 0.5}, 1: {0: 0.25, 1:0.75}}
        >>> model.add_transition_model('diff', diff_tm)
        >>> model.prob_from_sample([State('diff', 0)])
        array([ 0.27,  0.4 ,  0.18,  0.23, ..., 0.29])
        """
        if sample is None:
            # generate sample of size 10000
            sample = self.sample(self.random_state(), size=10000)
        if window_size is None:
            window_size = len(sample) // 100  # default window size is 100
        windows = len(sample) // window_size
        probabilities = np.zeros(windows)

        for i in range(windows):
            for j in range(window_size):
                ind = i * window_size + j
                state_eq = [sample.loc[ind, v] == s for v, s in state]
                if all(state_eq):
                    probabilities[i] += 1

        return probabilities / window_size
Пример #2
0
    def _make_table_str(self, tablefmt="fancy_grid", print_state_names=True):
        headers_list = []
        # build column headers

        evidence = self.variables[1:]
        evidence_card = self.cardinality[1:]
        if evidence:
            col_indexes = np.array(list(product(*[range(i) for i in evidence_card])))
            if self.state_names and print_state_names:
                for i in range(len(evidence_card)):
                    column_header = [evidence[i]] + ['{var}({state})'.format
                                                     (var=evidence[i],
                                                      state=self.state_names[evidence[i]][d])
                                                     for d in col_indexes.T[i]]
                    headers_list.append(column_header)
            else:
                for i in range(len(evidence_card)):
                    column_header = [evidence[i]] + ['{s}_{d}'.format(s=evidence[i], d=d) for d in col_indexes.T[i]]
                    headers_list.append(column_header)

        # Build row headers
        if self.state_names and print_state_names:
            variable_array = [['{var}({state})'.format
                               (var=self.variable, state=self.state_names[self.variable][i])
                               for i in range(self.variable_card)]]
        else:
            variable_array = [['{s}_{d}'.format(s=self.variable, d=i) for i in range(self.variable_card)]]
        # Stack with data
        labeled_rows = np.hstack((np.array(variable_array).T, self.get_cpd())).tolist()
        # No support for multi-headers in tabulate
        cdf_str = tabulate(headers_list + labeled_rows, tablefmt=tablefmt)
        return cdf_str
Пример #3
0
 def test_remove_multiple_factors(self):
     self.graph.add_nodes_from(['a', 'b', 'c'])
     phi1 = DiscreteFactor(['a', 'b'], [2, 2], range(4))
     phi2 = DiscreteFactor(['b', 'c'], [2, 2], range(4))
     self.graph.add_factors(phi1, phi2)
     self.graph.remove_factors(phi1, phi2)
     six.assertCountEqual(self, self.graph.factors, [])
Пример #4
0
 def test_get_factors(self):
     self.graph.add_nodes_from(['a', 'b', 'c'])
     phi1 = DiscreteFactor(['a', 'b'], [2, 2], range(4))
     phi2 = DiscreteFactor(['b', 'c'], [2, 2], range(4))
     six.assertCountEqual(self, self.graph.get_factors(), [])
     self.graph.add_factors(phi1, phi2)
     six.assertCountEqual(self, self.graph.get_factors(), [phi1, phi2])
Пример #5
0
    def _str(self, phi_or_p="phi", tablefmt="fancy_grid", print_state_names=True):
        """
        Generate the string from `__str__` method.

        Parameters
        ----------
        phi_or_p: 'phi' | 'p'
                'phi': When used for Factors.
                  'p': When used for CPDs.
        print_state_names: boolean
                If True, the user defined state names are displayed.
        """
        string_header = list(map(lambda x: six.text_type(x), self.scope()))
        string_header.append('{phi_or_p}({variables})'.format(phi_or_p=phi_or_p,
                                                              variables=','.join(string_header)))

        value_index = 0
        factor_table = []
        for prob in product(*[range(card) for card in self.cardinality]):
            if self.state_names and print_state_names:
                prob_list = ["{var}({state})".format(
                    var=list(self.variables)[i], state=self.state_names[list(
                        self.variables)[i]][prob[i]])
                             for i in range(len(self.variables))]
            else:
                prob_list = ["{s}_{d}".format(s=list(self.variables)[i], d=prob[i])
                             for i in range(len(self.variables))]

            prob_list.append(self.values.ravel()[value_index])
            factor_table.append(prob_list)
            value_index += 1

        return tabulate(factor_table, headers=string_header, tablefmt=tablefmt, floatfmt=".4f")
Пример #6
0
 def test_partition_function(self):
     self.graph.add_nodes_from(['a', 'b', 'c'])
     phi1 = DiscreteFactor(['a', 'b'], [2, 2], range(4))
     phi2 = DiscreteFactor(['b', 'c'], [2, 2], range(4))
     self.graph.add_factors(phi1, phi2)
     self.graph.add_edges_from([('a', 'b'), ('b', 'c')])
     self.assertEqual(self.graph.get_partition_function(), 22.0)
Пример #7
0
    def setUp(self):
        self.junction_tree = JunctionTree([(('A', 'B'), ('B', 'C')),
                                           (('B', 'C'), ('C', 'D'))])
        phi1 = DiscreteFactor(['A', 'B'], [2, 3], range(6))
        phi2 = DiscreteFactor(['B', 'C'], [3, 2], range(6))
        phi3 = DiscreteFactor(['C', 'D'], [2, 2], range(4))
        self.junction_tree.add_factors(phi1, phi2, phi3)

        self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'),
                                             ('J', 'Q'), ('J', 'L'),
                                             ('G', 'L')])
        cpd_a = TabularCPD('A', 2, values=[[0.2], [0.8]])
        cpd_r = TabularCPD('R', 2, values=[[0.4], [0.6]])
        cpd_j = TabularCPD('J',
                           2,
                           values=[[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]],
                           evidence=['A', 'R'],
                           evidence_card=[2, 2])
        cpd_q = TabularCPD('Q',
                           2,
                           values=[[0.9, 0.2], [0.1, 0.8]],
                           evidence=['J'],
                           evidence_card=[2])
        cpd_l = TabularCPD('L',
                           2,
                           values=[[0.9, 0.45, 0.8, 0.1],
                                   [0.1, 0.55, 0.2, 0.9]],
                           evidence=['J', 'G'],
                           evidence_card=[2, 2])
        cpd_g = TabularCPD('G', 2, values=[[0.6], [0.4]])
        self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r)
Пример #8
0
 def test_factorset_divide(self):
     phi1 = DiscreteFactor(['x1', 'x2', 'x3'], [2, 3, 2], range(1, 13))
     phi2 = DiscreteFactor(['x3', 'x4', 'x1'], [2, 2, 2], range(1, 9))
     factor_set1 = FactorSet(phi1, phi2)
     phi3 = DiscreteFactor(['x5', 'x6', 'x7'], [2, 2, 2], range(1, 9))
     phi4 = DiscreteFactor(['x5', 'x7', 'x8'], [2, 2, 2], range(1, 9))
     factor_set2 = FactorSet(phi3, phi4)
     factor_set3 = factor_set2.divide(factor_set1, inplace=False)
     self.assertEqual(
         {
             phi3, phi4,
             phi1.identity_factor() / phi1,
             phi2.identity_factor() / phi2
         }, factor_set3.factors)
Пример #9
0
    def sample(self, start_state=None, size=1):
        """
        Sample from the Markov Chain.

        Parameters:
        -----------
        start_state: dict or array-like iterable
            Representing the starting states of the variables. If None is passed, a random start_state is chosen.
        size: int
            Number of samples to be generated.

        Return Type:
        ------------
        pandas.DataFrame

        Examples:
        ---------
        >>> from pgm.factors import DiscreteFactor
        >>> from pgm.inference import GibbsSampling
        >>> from pgm.models import MarkovModel
        >>> model = MarkovModel([('A', 'B'), ('C', 'B')])
        >>> factor_ab = DiscreteFactor(['A', 'B'], [2, 2], [1, 2, 3, 4])
        >>> factor_cb = DiscreteFactor(['C', 'B'], [2, 2], [5, 6, 7, 8])
        >>> model.add_factors(factor_ab, factor_cb)
        >>> gibbs = GibbsSampling(model)
        >>> gibbs.sample(size=4)
           A  B  C
        0  0  1  1
        1  1  0  0
        2  1  1  0
        3  1  1  1
        """
        if start_state is None and self.state is None:
            self.state = self.random_state()
        elif start_state is not None:
            self.set_start_state(start_state)

        sampled = DataFrame(index=range(size), columns=self.variables)
        sampled.loc[0] = [st for var, st in self.state]
        for i in range(size - 1):
            for j, (var, st) in enumerate(self.state):
                other_st = tuple(st for v, st in self.state if var != v)
                next_st = sample_discrete(
                    list(range(self.cardinalities[var])),
                    self.transition_models[var][other_st])[0]
                self.state[j] = State(var, next_st)
            sampled.loc[i + 1] = [st for var, st in self.state]
        return sampled
Пример #10
0
    def get_independencies(self, latex=False):
        """
        Computes independencies in the Bayesian Network, by checking d-seperation.

        Parameters
        ----------
        latex: boolean
            If latex=True then latex string of the independence assertion
            would be created.

        Examples
        --------
        >>> from pgm.models import BayesianModel
        >>> chain = BayesianModel([('X', 'Y'), ('Y', 'Z')])
        >>> chain.get_independencies()
        (X _|_ Z | Y)
        (Z _|_ X | Y)
        """
        independencies = Independencies()
        for start in (self.nodes()):
            rest = set(self.nodes()) - {start}
            for r in range(len(rest)):
                for observed in itertools.combinations(rest, r):
                    d_seperated_variables = rest - set(observed) - set(
                        self.active_trail_nodes(start, observed=observed))
                    if d_seperated_variables:
                        independencies.add_assertions(
                            [start, d_seperated_variables, observed])

        independencies.reduce()

        if not latex:
            return independencies
        else:
            return independencies.latex_string()
Пример #11
0
    def __eq__(self, other):
        if not (isinstance(self, DiscreteFactor) and isinstance(other, DiscreteFactor)):
            return False

        elif set(self.scope()) != set(other.scope()):
            return False

        else:
            phi = other.copy()
            for axis in range(self.values.ndim):
                exchange_index = phi.variables.index(self.variables[axis])
                phi.variables[axis], phi.variables[exchange_index] = (phi.variables[exchange_index],
                                                                      phi.variables[axis])
                phi.cardinality[axis], phi.cardinality[exchange_index] = (phi.cardinality[exchange_index],
                                                                          phi.cardinality[axis])
                phi.values = phi.values.swapaxes(axis, exchange_index)

            if phi.values.shape != self.values.shape:
                return False
            elif not np.allclose(phi.values, self.values):
                return False
            elif not all(self.cardinality == phi.cardinality):
                return False
            else:
                return True
Пример #12
0
    def get_edges(self):
        """
        Returns the edges of the network.

        Returns
        -------
        set: set containing the edges of the network

        Example
        -------
        >>> reader = UAIReader('TestUAI.uai')
        >>> reader.get_edges()
        {('var_0', 'var_1'), ('var_0', 'var_2'), ('var_1', 'var_2')}
        """
        edges = []
        for function in range(0, self.no_functions):
            function_variables = self.grammar.parseString(
                self.network)['fun_' + str(function)]
            if isinstance(function_variables, int):
                function_variables = [function_variables]
            if self.network_type == 'BAYES':
                child_var = "var_" + str(function_variables[-1])
                function_variables = function_variables[:-1]
                for var in function_variables:
                    edges.append((child_var, "var_" + str(var)))
            elif self.network_type == "MARKOV":
                function_variables = [
                    "var_" + str(var) for var in function_variables
                ]
                edges.extend(list(combinations(function_variables, 2)))
        return set(edges)
Пример #13
0
    def get_partition_function(self):
        """
        Returns the partition function for a given undirected graph.

        A partition function is defined as

        .. math:: \sum_{X}(\prod_{i=1}^{m} \phi_i)

        where m is the number of factors present in the graph
        and X are all the random variables present.

        Examples
        --------
        >>> from pgm.models import FactorGraph
        >>> from pgm.factors.discrete import DiscreteFactor
        >>> G = FactorGraph()
        >>> G.add_nodes_from(['a', 'b', 'c'])
        >>> phi1 = DiscreteFactor(['a', 'b'], [2, 2], np.random.rand(4))
        >>> phi2 = DiscreteFactor(['b', 'c'], [2, 2], np.random.rand(4))
        >>> G.add_factors(phi1, phi2)
        >>> G.add_nodes_from([phi1, phi2])
        >>> G.add_edges_from([('a', phi1), ('b', phi1),
        ...                   ('b', phi2), ('c', phi2)])
        >>> G.get_factors()
        >>> G.get_partition_function()
        """
        factor = self.factors[0]
        factor = factor_product(
            factor, *[self.factors[i] for i in range(1, len(self.factors))])
        if set(factor.scope()) != set(self.get_variable_nodes()):
            raise ValueError(
                'DiscreteFactor for all the random variables not defined.')

        return np.sum(factor.values)
Пример #14
0
    def _get_kernel_from_bayesian_model(self, model):
        """
        Computes the Gibbs transition models from a Bayesian Network.
        'Probabilistic Graphical Model Principles and Techniques', Koller and
        Friedman, Section 12.3.3 pp 512-513.

        Parameters:
        -----------
        model: BayesianModel
            The model from which probabilities will be computed.
        """
        self.variables = np.array(model.nodes())
        self.cardinalities = {
            var: model.get_cpds(var).variable_card
            for var in self.variables
        }

        for var in self.variables:
            other_vars = [v for v in self.variables if var != v]
            other_cards = [self.cardinalities[v] for v in other_vars]
            cpds = [cpd for cpd in model.cpds if var in cpd.scope()]
            prod_cpd = factor_product(*cpds)
            kernel = {}
            scope = set(prod_cpd.scope())
            for tup in itertools.product(
                    *[range(card) for card in other_cards]):
                states = [
                    State(v, s) for v, s in zip(other_vars, tup) if v in scope
                ]
                prod_cpd_reduced = prod_cpd.reduce(states, inplace=False)
                kernel[tup] = prod_cpd_reduced.values / sum(
                    prod_cpd_reduced.values)
            self.transition_models[var] = kernel
Пример #15
0
    def get_partition_function(self):
        r"""
        Returns the partition function for a given undirected graph.

        A partition function is defined as

        .. math:: \sum_{X}(\prod_{i=1}^{m} \phi_i)

        where m is the number of factors present in the graph
        and X are all the random variables present.

        Examples
        --------
        >>> from pgm.models import ClusterGraph
        >>> from pgm.factors.discrete import DiscreteFactor
        >>> G = ClusterGraph()
        >>> G.add_nodes_from([('a', 'b', 'c'), ('a', 'b'), ('a', 'c')])
        >>> G.add_edges_from([(('a', 'b', 'c'), ('a', 'b')),
        ...                   (('a', 'b', 'c'), ('a', 'c'))])
        >>> phi1 = DiscreteFactor(['a', 'b', 'c'], [2, 2, 2], np.random.rand(8))
        >>> phi2 = DiscreteFactor(['a', 'b'], [2, 2], np.random.rand(4))
        >>> phi3 = DiscreteFactor(['a', 'c'], [2, 2], np.random.rand(4))
        >>> G.add_factors(phi1, phi2, phi3)
        >>> G.get_partition_function()
        """
        if self.check_model():
            factor = self.factors[0]
            factor = factor_product(
                factor,
                *[self.factors[i] for i in range(1, len(self.factors))])
            return np.sum(factor.values)
Пример #16
0
    def get_partition_function(self):
        """
        Returns the partition function for a given undirected graph.

        A partition function is defined as

        .. math:: \sum_{X}(\prod_{i=1}^{m} \phi_i)

        where m is the number of factors present in the graph
        and X are all the random variables present.

        Examples
        --------
        >>> from pgm.models import MarkovModel
        >>> from pgm.factors.discrete import DiscreteFactor
        >>> G = MarkovModel()
        >>> G.add_nodes_from(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7'])
        >>> G.add_edges_from([('x1', 'x3'), ('x1', 'x4'), ('x2', 'x4'),
        ...                   ('x2', 'x5'), ('x3', 'x6'), ('x4', 'x6'),
        ...                   ('x4', 'x7'), ('x5', 'x7')])
        >>> phi = [DiscreteFactor(edge, [2, 2], np.random.rand(4)) for edge in G.edges()]
        >>> G.add_factors(*phi)
        >>> G.get_partition_function()
        """
        self.check_model()

        factor = self.factors[0]
        factor = factor_product(
            factor, *[self.factors[i] for i in range(1, len(self.factors))])
        if set(factor.scope()) != set(self.nodes()):
            raise ValueError(
                'DiscreteFactor for all the random variables not defined.')

        return np.sum(factor.values)
Пример #17
0
    def set_distributions(self):
        """
        Set distributions in the network.

        Examples
        --------
        >>> from pgm.readwrite.XMLBeliefNetwork import XBNWriter
        >>> writer =XBNWriter()
        >>> writer.set_distributions()
        """
        distributions = etree.SubElement(self.bnmodel, 'DISTRIBUTIONS')

        cpds = self.model.get_cpds()
        cpds.sort(key=lambda x: x.variable)
        for cpd in cpds:
            cpd_values = cpd.values.ravel()
            var = cpd.variable
            dist = etree.SubElement(distributions, 'DIST', attrib={'TYPE': self.model.node[var]['TYPE']})
            etree.SubElement(dist, 'PRIVATE', attrib={'NAME': var})
            dpis = etree.SubElement(dist, 'DPIS')
            evidence = cpd.variables[:0:-1]
            if evidence:
                condset = etree.SubElement(dist, 'CONDSET')
                for condelem in sorted(evidence):
                    etree.SubElement(condset, 'CONDELEM', attrib={'NAME': condelem})
                # TODO: Get Index value.
                for val in range(0, len(cpd_values), 2):
                    etree.SubElement(dpis, "DPI", attrib={'INDEXES': ' '}).text = \
                        " " + str(cpd_values[val]) + " " + str(cpd_values[val+1]) + " "
            else:
                etree.SubElement(dpis, "DPI").text = ' ' + ' '.join(map(str, cpd_values))
Пример #18
0
    def get_states(self):
        """
        Add states to variable of BIF

        Returns
        -------
        dict: dict of type {variable: a list of states}

        Example
        -------
        >>> from pgm.readwrite import BIFReader, BIFWriter
        >>> model = BIFReader('dog-problem.bif').get_model()
        >>> writer = BIFWriter(model)
        >>> writer.get_states()
        {'bowel-problem': ['bowel-problem_0', 'bowel-problem_1'],
         'dog-out': ['dog-out_0', 'dog-out_1'],
         'family-out': ['family-out_0', 'family-out_1'],
         'hear-bark': ['hear-bark_0', 'hear-bark_1'],
         'light-on': ['light-on_0', 'light-on_1']}
        """
        variable_states = {}
        cpds = self.model.get_cpds()
        for cpd in cpds:
            variable = cpd.variable
            variable_states[variable] = []
            for state in range(cpd.get_cardinality([variable])[variable]):
                variable_states[variable].append(
                    str(variable) + '_' + str(state))
        return variable_states
Пример #19
0
    def forward_sample(self, size=1):
        """
        Generates sample(s) from joint distribution of the bayesian network.

        Parameters
        ----------
        size: int
            size of sample to be generated

        Returns
        -------
        sampled: pandas.DataFrame
            the generated samples

        Examples
        --------
        >>> from pgm.models.BayesianModel import BayesianModel
        >>> from pgm.factors.discrete import TabularCPD
        >>> from pgm.sampling import BayesianModelSampling
        >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        >>> cpd_d = TabularCPD('diff', 2, [[0.6], [0.4]])
        >>> cpd_i = TabularCPD('intel', 2, [[0.7], [0.3]])
        >>> cpd_g = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25,
        ...                0.08, 0.3], [0.3, 0.7, 0.02, 0.2]],
        ...                ['intel', 'diff'], [2, 2])
        >>> student.add_cpds(cpd_d, cpd_i, cpd_g)
        >>> inference = BayesianModelSampling(student)
        >>> inference.forward_sample(2)
                diff       intel       grade
        0        1           0          1
        1        1           0          2
        """
        sampled = DataFrame(index=range(size), columns=self.topological_order)
        for node in self.topological_order:
            cpd = self.model.get_cpds(node)
            states = range(self.cardinality[node])
            evidence = cpd.variables[:0:-1]
            if evidence:
                cached_values = self.pre_compute_reduce(variable=node)
                evidence = sampled.ix[:, evidence].values
                weights = list(map(lambda t: cached_values[tuple(t)],
                                   evidence))
            else:
                weights = cpd.values
            sampled[node] = sample_discrete(states, weights, size)
        return sampled
Пример #20
0
 def _find_common_cliques(cliques_list):
     """
     Finds the common cliques among the given set of cliques for
     corresponding node.
     """
     common = set([tuple(x) for x in cliques_list[0]])
     for i in range(1, len(cliques_list)):
         common = common & set([tuple(x) for x in cliques_list[i]])
     return list(common)
Пример #21
0
    def to_bayesian_model(self):
        """
        Creates a Bayesian Model which is a minimum I-Map for this markov model.

        The ordering of parents may not remain constant. It would depend on the
        ordering of variable in the junction tree (which is not constant) all the
        time.

        Examples
        --------
        >>> from pgm.models import MarkovModel
        >>> from pgm.factors.discrete import DiscreteFactor
        >>> mm = MarkovModel()
        >>> mm.add_nodes_from(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7'])
        >>> mm.add_edges_from([('x1', 'x3'), ('x1', 'x4'), ('x2', 'x4'),
        ...                    ('x2', 'x5'), ('x3', 'x6'), ('x4', 'x6'),
        ...                    ('x4', 'x7'), ('x5', 'x7')])
        >>> phi = [DiscreteFactor(edge, [2, 2], np.random.rand(4)) for edge in mm.edges()]
        >>> mm.add_factors(*phi)
        >>> bm = mm.to_bayesian_model()
        """
        from pgm.models import BayesianModel

        bm = BayesianModel()
        var_clique_dict = defaultdict(tuple)
        var_order = []

        # Create a junction tree from the markov model.
        # Creation of clique tree involves triangulation, finding maximal cliques
        # and creating a tree from these cliques
        junction_tree = self.to_junction_tree()

        # create an ordering of the nodes based on the ordering of the clique
        # in which it appeared first
        root_node = junction_tree.nodes()[0]
        bfs_edges = nx.bfs_edges(junction_tree, root_node)
        for node in root_node:
            var_clique_dict[node] = root_node
            var_order.append(node)
        for edge in bfs_edges:
            clique_node = edge[1]
            for node in clique_node:
                if not var_clique_dict[node]:
                    var_clique_dict[node] = clique_node
                    var_order.append(node)

        # create a bayesian model by adding edges from parent of node to node as
        # par(x_i) = (var(c_k) - x_i) \cap {x_1, ..., x_{i-1}}
        for node_index in range(len(var_order)):
            node = var_order[node_index]
            node_parents = (set(var_clique_dict[node]) -
                            set([node])).intersection(
                                set(var_order[:node_index]))
            bm.add_edges_from([(parent, node) for parent in node_parents])
            # TODO : Convert factor into CPDs
        return bm
Пример #22
0
    def reduce(self, values, inplace=True):
        """
        Reduces the factor to the context of given variable values.

        Parameters
        ----------
        values: list, array-like
            A list of tuples of the form (variable_name, variable_state).

        inplace: boolean
            If inplace=True it will modify the factor itself, else would return
            a new factor.

        Returns
        -------
        DiscreteFactor or None: if inplace=True (default) returns None
                        if inplace=False returns a new `DiscreteFactor` instance.

        Examples
        --------
        >>> from pgm.factors.discrete import DiscreteFactor
        >>> phi = DiscreteFactor(['x1', 'x2', 'x3'], [2, 3, 2], range(12))
        >>> phi.reduce([('x1', 0), ('x2', 0)])
        >>> phi.variables
        ['x3']
        >>> phi.cardinality
        array([2])
        >>> phi.values
        array([0., 1.])
        """
        if isinstance(values, six.string_types):
            raise TypeError("values: Expected type list or array-like, got type str")

        if (any(isinstance(value, six.string_types) for value in values) or
                not all(isinstance(state, (int, np.integer)) for var, state in values)):
            raise TypeError("values: must contain tuples or array-like elements of the form "
                            "(hashable object, type int)")

        phi = self if inplace else self.copy()

        var_index_to_del = []
        slice_ = [slice(None)] * len(self.variables)
        for var, state in values:
            var_index = phi.variables.index(var)
            slice_[var_index] = state
            var_index_to_del.append(var_index)

        var_index_to_keep = sorted(set(range(len(phi.variables))) - set(var_index_to_del))
        # set difference is not gaurenteed to maintain ordering
        phi.variables = [phi.variables[index] for index in var_index_to_keep]
        phi.cardinality = phi.cardinality[var_index_to_keep]
        phi.values = phi.values[tuple(slice_)]

        if not inplace:
            return phi
Пример #23
0
    def _tighten_triplet(self, max_iterations, later_iter, max_triplets, prolong):
        """
        This method finds all the triplets that are eligible and adds them iteratively in the bunch of max_triplets

        Parameters
        ----------
        max_iterations: integer
                        Maximum number of times we tighten the relaxation

        later_iter: integer
                    Number of maximum iterations that we want MPLP to run. This is lesser than the initial number
                    of iterations.

        max_triplets: integer
                      Maximum number of triplets that can be added atmost in one iteration.

        prolong: bool
                It sets the continuation of tightening after all the triplets are exhausted
        """
        # Find all the triplets that are possible in the present model
        triangles = self.find_triangles()
        # Evaluate scores for each of the triplets found above
        triplet_scores = self._get_triplet_scores(triangles)
        # Arrange the keys on the basis of increasing order of the values of the dict. triplet_scores
        sorted_scores = sorted(triplet_scores, key=triplet_scores.get)
        for niter in range(max_iterations):
            if self._is_converged(integrality_gap_threshold=self.integrality_gap_threshold):
                break
            # add triplets that are yet not added.
            add_triplets = []
            for triplet_number in (range(len(sorted_scores))):
                # At once, we can add atmost 5 triplets
                if triplet_number >= max_triplets:
                    break
                add_triplets.append(sorted_scores.pop())
            # Break from the tighten triplets loop if there are no triplets to add if the prolong is set to False
            if not add_triplets and prolong is False:
                    break
            # Update the eligible triplets to tighten the relaxation
            self._update_triangles(add_triplets)
            # Run MPLP for a maximum of later_iter times.
            self._run_mplp(later_iter)
Пример #24
0
    def maximize(self, variables, inplace=True):
        """
        Maximizes the factor with respect to `variables`.

        Parameters
        ----------
        variables: list, array-like
            List of variables with respect to which factor is to be maximized

        inplace: boolean
            If inplace=True it will modify the factor itself, else would return
            a new factor.

        Returns
        -------
        DiscreteFactor or None: if inplace=True (default) returns None
                        if inplace=False returns a new `DiscreteFactor` instance.

        Examples
        --------
        >>> from pgm.factors.discrete import DiscreteFactor
        >>> phi = DiscreteFactor(['x1', 'x2', 'x3'], [3, 2, 2], [0.25, 0.35, 0.08, 0.16, 0.05, 0.07,
        ...                                              0.00, 0.00, 0.15, 0.21, 0.09, 0.18])
        >>> phi.variables
        ['x1','x2','x3']
        >>> phi.maximize(['x2'])
        >>> phi.variables
        ['x1', 'x3']
        >>> phi.cardinality
        array([3, 2])
        >>> phi.values
        array([[ 0.25,  0.35],
               [ 0.05,  0.07],
               [ 0.15,  0.21]])
        """
        if isinstance(variables, six.string_types):
            raise TypeError("variables: Expected type list or array-like, got type str")

        phi = self if inplace else self.copy()

        for var in variables:
            if var not in phi.variables:
                raise ValueError("{var} not in scope.".format(var=var))

        var_indexes = [phi.variables.index(var) for var in variables]

        index_to_keep = sorted(set(range(len(self.variables))) - set(var_indexes))
        phi.variables = [phi.variables[index] for index in index_to_keep]
        phi.cardinality = phi.cardinality[index_to_keep]

        phi.values = np.max(phi.values, axis=tuple(var_indexes))

        if not inplace:
            return phi
Пример #25
0
    def get_grammar(self):
        """
        Returns the grammar of the UAI file.
        """
        network_name = Word(alphas).setResultsName('network_name')
        no_variables = Word(nums).setResultsName('no_variables')
        grammar = network_name + no_variables
        self.no_variables = int(
            grammar.parseString(self.network)['no_variables'])
        domain_variables = (
            Word(nums) * self.no_variables).setResultsName('domain_variables')
        grammar += domain_variables
        no_functions = Word(nums).setResultsName('no_functions')
        grammar += no_functions
        self.no_functions = int(
            grammar.parseString(self.network)['no_functions'])
        integer = Word(nums).setParseAction(lambda t: int(t[0]))
        for function in range(0, self.no_functions):
            scope_grammar = Word(nums).setResultsName('fun_scope_' +
                                                      str(function))
            grammar += scope_grammar
            function_scope = grammar.parseString(self.network)['fun_scope_' +
                                                               str(function)]
            function_grammar = (
                (integer) * int(function_scope)).setResultsName('fun_' +
                                                                str(function))
            grammar += function_grammar

        floatnumber = Combine(
            Word(nums) + Optional(Literal(".") + Optional(Word(nums))))
        for function in range(0, self.no_functions):
            no_values_grammar = Word(nums).setResultsName('fun_no_values_' +
                                                          str(function))
            grammar += no_values_grammar
            no_values = grammar.parseString(self.network)['fun_no_values_' +
                                                          str(function)]
            values_grammar = ((floatnumber) *
                              int(no_values)).setResultsName('fun_values_' +
                                                             str(function))
            grammar += values_grammar
        return grammar
Пример #26
0
 def __hash__(self):
     variable_hashes = [hash(variable) for variable in self.variables]
     sorted_var_hashes = sorted(variable_hashes)
     phi = self.copy()
     for axis in range(phi.values.ndim):
         exchange_index = variable_hashes.index(sorted_var_hashes[axis])
         variable_hashes[axis], variable_hashes[exchange_index] = (variable_hashes[exchange_index],
                                                                   variable_hashes[axis])
         phi.cardinality[axis], phi.cardinality[exchange_index] = (phi.cardinality[exchange_index],
                                                                   phi.cardinality[axis])
         phi.values = phi.values.swapaxes(axis, exchange_index)
     return hash(str(sorted_var_hashes) + str(phi.values) + str(phi.cardinality))
Пример #27
0
 def test_get_model(self):
     edges_expected = [('family-out', 'dog-out'),
                       ('bowel-problem', 'dog-out'),
                       ('family-out', 'light-on'), ('dog-out', 'hear-bark')]
     nodes_expected = [
         'bowel-problem', 'hear-bark', 'light-on', 'dog-out', 'family-out'
     ]
     edge_expected = {
         'bowel-problem': {
             'dog-out': {}
         },
         'dog-out': {
             'hear-bark': {}
         },
         'family-out': {
             'dog-out': {},
             'light-on': {}
         },
         'hear-bark': {},
         'light-on': {}
     }
     node_expected = {
         'bowel-problem': {
             'position': '(335, 99)'
         },
         'dog-out': {
             'position': '(300, 195)'
         },
         'family-out': {
             'position': '(257, 99)'
         },
         'hear-bark': {
             'position': '(296, 268)'
         },
         'light-on': {
             'position': '(218, 195)'
         }
     }
     cpds_expected = [
         np.array([[0.01], [0.99]]),
         np.array([[0.99, 0.97, 0.9, 0.3], [0.01, 0.03, 0.1, 0.7]]),
         np.array([[0.15], [0.85]]),
         np.array([[0.7, 0.01], [0.3, 0.99]]),
         np.array([[0.6, 0.05], [0.4, 0.95]])
     ]
     model = self.reader.get_model()
     for cpd_index in range(0, len(cpds_expected)):
         np_test.assert_array_equal(model.get_cpds()[cpd_index].get_cpd(),
                                    cpds_expected[cpd_index])
     self.assertDictEqual(model.node, node_expected)
     self.assertDictEqual(model.edge, edge_expected)
     self.assertListEqual(sorted(model.nodes()), sorted(nodes_expected))
     self.assertListEqual(sorted(model.edges()), sorted(edges_expected))
Пример #28
0
    def pre_compute_reduce(self, variable):
        variable_cpd = self.model.get_cpds(variable)
        variable_evid = variable_cpd.variables[:0:-1]
        cached_values = {}

        for state_combination in itertools.product(
                *[range(self.cardinality[var]) for var in variable_evid]):
            states = list(zip(variable_evid, state_combination))
            cached_values[state_combination] = variable_cpd.reduce(
                states, inplace=False).values

        return cached_values
Пример #29
0
    def test_max_calibrate_sepset_belief(self):
        belief_propagation = BeliefPropagation(self.junction_tree)
        belief_propagation.max_calibrate()
        sepset_belief = belief_propagation.get_sepset_beliefs()

        phi1 = DiscreteFactor(['A', 'B'], [2, 3], range(6))
        phi2 = DiscreteFactor(['B', 'C'], [3, 2], range(6))
        phi3 = DiscreteFactor(['C', 'D'], [2, 2], range(4))

        b_B = (phi1 * (phi3.maximize(['D'], inplace=False) * phi2).maximize(
            ['C'], inplace=False)).maximize(['A'], inplace=False)

        b_C = (phi2 * (phi1.maximize(['A'], inplace=False) *
                       phi3.maximize(['D'], inplace=False))).maximize(
                           ['B'], inplace=False)

        np_test.assert_array_almost_equal(
            sepset_belief[frozenset((('A', 'B'), ('B', 'C')))].values,
            b_B.values)
        np_test.assert_array_almost_equal(
            sepset_belief[frozenset((('B', 'C'), ('C', 'D')))].values,
            b_C.values)
Пример #30
0
    def setUp(self):
        self.variables = ['intel', 'diff', 'grade']
        self.card = [3, 2, 3]
        self.cardinalities = {'intel': 3, 'diff': 2, 'grade': 3}
        self.intel_tm = {0: {0: 0.1, 1: 0.25, 2: 0.65}, 1: {0: 0.5, 1: 0.3, 2: 0.2}, 2: {0: 0.3, 1: 0.3, 2: 0.4}}
        self.diff_tm = {0: {0: 0.3, 1: 0.7}, 1: {0: 0.75, 1: 0.25}}
        self.grade_tm = {0: {0: 0.4, 1: 0.2, 2: 0.4}, 1: {0: 0.9, 1: 0.05, 2: 0.05}, 2: {0: 0.1, 1: 0.4, 2: 0.5}}
        self.start_state = [State('intel', 0), State('diff', 1), State('grade', 2)]
        self.model = MC()

        self.sample = DataFrame(index=range(200), columns=['a', 'b'])
        self.sample.a = [1] * 100 + [0] * 100
        self.sample.b = [0] * 100 + [1] * 100