Exemplo n.º 1
0
    def generate_sample(self, start_state=None, size=1):
        """
        Generator version of self.sample

        Return Type:
        ------------
        List of State namedtuples, representing the assignment to all variables of the model.

        Examples:
        ---------
        >>> from pgmpy.factors import Factor
        >>> from pgmpy.inference import GibbsSampling
        >>> from pgmpy.models import MarkovModel
        >>> model = MarkovModel([('A', 'B'), ('C', 'B')])
        >>> factor_ab = Factor(['A', 'B'], [2, 2], [1, 2, 3, 4])
        >>> factor_cb = Factor(['C', 'B'], [2, 2], [5, 6, 7, 8])
        >>> model.add_factors(factor_ab, factor_cb)
        >>> gibbs = GibbsSampling(model)
        >>> gen = gibbs.generate_sample(size=2)
        >>> [sample for sample in gen]
        [[State(var='C', state=1), State(var='B', state=1), State(var='A', state=0)],
         [State(var='C', state=0), State(var='B', state=1), State(var='A', state=1)]]
        """
        if start_state is None and self.state is None:
            self.state = self.random_state()
        else:
            self.set_start_state(start_state)

        for i in range(size):
            for j, (var, st) in enumerate(self.state):
                other_st = tuple(st for v, st in self.state if var != v)
                next_st = sample_discrete(list(range(self.cardinalities[var])),
                                          self.transition_models[var][other_st])[0]
                self.state[j] = State(var, next_st)
            yield self.state[:]
Exemplo n.º 2
0
    def forward_sample(self, size=1):
        """
        Generates sample(s) from joint distribution of the bayesian network.

        Parameters
        ----------
        size: int
            size of sample to be generated

        Returns
        -------
        sampled: pandas.DataFrame
            the generated samples

        Examples
        --------
        >>> from pgmpy.models.BayesianModel import BayesianModel
        >>> from pgmpy.factors.CPD import TabularCPD
        >>> from pgmpy.inference.Sampling import BayesianModelSampling
        >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        >>> cpd_d = TabularCPD('diff', 2, [[0.6], [0.4]])
        >>> cpd_i = TabularCPD('intel', 2, [[0.7], [0.3]])
        >>> cpd_g = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25,
        ...                0.08, 0.3], [0.3, 0.7, 0.02, 0.2]],
        ...                ['intel', 'diff'], [2, 2])
        >>> student.add_cpds(cpd_d, cpd_i, cpd_g)
        >>> inference = BayesianModelSampling(student)
        >>> inference.forward_sample(2)
                diff       intel       grade
        0  (diff, 1)  (intel, 0)  (grade, 1)
        1  (diff, 1)  (intel, 0)  (grade, 2)
        """
        sampled = DataFrame(index=range(size), columns=self.topological_order)
        for node in self.topological_order:
            cpd = self.cpds[node]
            states = [st for var, st in cpd.variables[node]]
            if cpd.evidence:
                indices = [i for i, x in enumerate(self.topological_order) if x in cpd.evidence]
                evidence = sampled.values[:, [indices]].tolist()
                weights = list(map(lambda t: cpd.reduce(t[0], inplace=False).values, evidence))
                sampled[node] = list(map(lambda t: State(node, t), sample_discrete(states, weights)))
            else:
                sampled[node] = list(map(lambda t: State(node, t),
                                     sample_discrete(states, cpd.values, size)))
        return sampled
    def forward_sample(self, size=1, return_type="dataframe"):
        """
        Generates sample(s) from joint distribution of the bayesian network.

        Parameters
        ----------
        size: int
            size of sample to be generated

        return_type: string (dataframe | recarray)
            Return type for samples, either of 'dataframe' or 'recarray'.
            Defaults to 'dataframe'

        Returns
        -------
        sampled: A pandas.DataFrame or a numpy.recarray object depending upon return_type argument
            the generated samples


        Examples
        --------
        >>> from pgmpy.models.BayesianModel import BayesianModel
        >>> from pgmpy.factors.discrete import TabularCPD
        >>> from pgmpy.sampling import BayesianModelSampling
        >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        >>> cpd_d = TabularCPD('diff', 2, [[0.6], [0.4]])
        >>> cpd_i = TabularCPD('intel', 2, [[0.7], [0.3]])
        >>> cpd_g = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25,
        ...                0.08, 0.3], [0.3, 0.7, 0.02, 0.2]],
        ...                ['intel', 'diff'], [2, 2])
        >>> student.add_cpds(cpd_d, cpd_i, cpd_g)
        >>> inference = BayesianModelSampling(student)
        >>> inference.forward_sample(size=2, return_type='recarray')
        rec.array([(0, 0, 1), (1, 0, 2)], dtype=
                  [('diff', '<i8'), ('intel', '<i8'), ('grade', '<i8')])
        """
        types = [(var_name, "int") for var_name in self.topological_order]
        sampled = np.zeros(size, dtype=types).view(np.recarray)

        pbar = tqdm(self.topological_order)
        for node in pbar:
            pbar.set_description(
                "Generating for node: {node}".format(node=node))
            cpd = self.model.get_cpds(node)
            states = range(self.cardinality[node])
            evidence = cpd.variables[:0:-1]
            if evidence:
                cached_values = self.pre_compute_reduce(variable=node)
                evidence = np.vstack([sampled[i] for i in evidence])
                weights = list(
                    map(lambda t: cached_values[tuple(t)], evidence.T))
            else:
                weights = cpd.values
            sampled[node] = sample_discrete(states, weights, size)

        return _return_samples(return_type, sampled)
    def sample(self, start_state=None, size=1, return_type="dataframe"):
        """
        Sample from the Markov Chain.

        Parameters
        ----------
        start_state: dict or array-like iterable
            Representing the starting states of the variables. If None is passed, a random start_state is chosen.
        size: int
            Number of samples to be generated.
        return_type: string (dataframe | recarray)
            Return type for samples, either of 'dataframe' or 'recarray'.
            Defaults to 'dataframe'

        Returns
        -------
        sampled: A pandas.DataFrame or a numpy.recarray object depending upon return_type argument
            the generated samples

        Examples
        --------
        >>> from pgmpy.factors import DiscreteFactor
        >>> from pgmpy.sampling import GibbsSampling
        >>> from pgmpy.models import MarkovModel
        >>> model = MarkovModel([('A', 'B'), ('C', 'B')])
        >>> factor_ab = DiscreteFactor(['A', 'B'], [2, 2], [1, 2, 3, 4])
        >>> factor_cb = DiscreteFactor(['C', 'B'], [2, 2], [5, 6, 7, 8])
        >>> model.add_factors(factor_ab, factor_cb)
        >>> gibbs = GibbsSampling(model)
        >>> gibbs.sample(size=4, return_tupe='dataframe')
           A  B  C
        0  0  1  1
        1  1  0  0
        2  1  1  0
        3  1  1  1
        """
        if start_state is None and self.state is None:
            self.state = self.random_state()
        elif start_state is not None:
            self.set_start_state(start_state)

        types = [(var_name, "int") for var_name in self.variables]
        sampled = np.zeros(size, dtype=types).view(np.recarray)
        sampled[0] = tuple([st for var, st in self.state])
        for i in tqdm(range(size - 1)):
            for j, (var, st) in enumerate(self.state):
                other_st = tuple(st for v, st in self.state if var != v)
                next_st = sample_discrete(
                    list(range(self.cardinalities[var])),
                    self.transition_models[var][other_st],
                )[0]
                self.state[j] = State(var, next_st)
            sampled[i + 1] = tuple([st for var, st in self.state])

        return _return_samples(return_type, sampled)
Exemplo n.º 5
0
    def sample(self, start_state=None, size=1, return_type="dataframe"):
        """
        Sample from the Markov Chain.

        Parameters:
        -----------
        start_state: dict or array-like iterable
            Representing the starting states of the variables. If None is passed, a random start_state is chosen.
        size: int
            Number of samples to be generated.
        return_type: string (dataframe | recarray)
            Return type for samples, either of 'dataframe' or 'recarray'.
            Defaults to 'dataframe'

        Returns
        -------
        sampled: A pandas.DataFrame or a numpy.recarray object depending upon return_type argument
            the generated samples

        Examples:
        ---------
        >>> from pgmpy.factors import DiscreteFactor
        >>> from pgmpy.inference import GibbsSampling
        >>> from pgmpy.models import MarkovModel
        >>> model = MarkovModel([('A', 'B'), ('C', 'B')])
        >>> factor_ab = DiscreteFactor(['A', 'B'], [2, 2], [1, 2, 3, 4])
        >>> factor_cb = DiscreteFactor(['C', 'B'], [2, 2], [5, 6, 7, 8])
        >>> model.add_factors(factor_ab, factor_cb)
        >>> gibbs = GibbsSampling(model)
        >>> gibbs.sample(size=4, return_tupe='dataframe')
           A  B  C
        0  0  1  1
        1  1  0  0
        2  1  1  0
        3  1  1  1
        """
        if start_state is None and self.state is None:
            self.state = self.random_state()
        elif start_state is not None:
            self.set_start_state(start_state)

        types = [(var_name, 'int') for var_name in self.variables]
        sampled = np.zeros(size, dtype=types).view(np.recarray)
        sampled[0] = np.array([st for var, st in self.state])
        for i in range(size - 1):
            for j, (var, st) in enumerate(self.state):
                other_st = tuple(st for v, st in self.state if var != v)
                next_st = sample_discrete(list(range(self.cardinalities[var])),
                                          self.transition_models[var][other_st])[0]
                self.state[j] = State(var, next_st)
            sampled[i + 1] = np.array([st for var, st in self.state])

        return _return_samples(return_type, sampled)
Exemplo n.º 6
0
    def forward_sample(self, size=1, return_type='dataframe'):
        """
        Generates sample(s) from joint distribution of the bayesian network.

        Parameters
        ----------
        size: int
            size of sample to be generated

        return_type: string (dataframe | recarray)
            Return type for samples, either of 'dataframe' or 'recarray'.
            Defaults to 'dataframe'

        Returns
        -------
        sampled: A pandas.DataFrame or a numpy.recarray object depending upon return_type argument
            the generated samples


        Examples
        --------
        >>> from pgmpy.models.BayesianModel import BayesianModel
        >>> from pgmpy.factors.discrete import TabularCPD
        >>> from pgmpy.sampling import BayesianModelSampling
        >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        >>> cpd_d = TabularCPD('diff', 2, [[0.6], [0.4]])
        >>> cpd_i = TabularCPD('intel', 2, [[0.7], [0.3]])
        >>> cpd_g = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25,
        ...                0.08, 0.3], [0.3, 0.7, 0.02, 0.2]],
        ...                ['intel', 'diff'], [2, 2])
        >>> student.add_cpds(cpd_d, cpd_i, cpd_g)
        >>> inference = BayesianModelSampling(student)
        >>> inference.forward_sample(size=2, return_type='recarray')
        rec.array([(0, 0, 1), (1, 0, 2)], 
          dtype=[('diff', '<i8'), ('intel', '<i8'), ('grade', '<i8')])
        """
        types = [(var_name, 'int') for var_name in self.topological_order]
        sampled = np.zeros(size, dtype=types).view(np.recarray)

        for node in self.topological_order:
            cpd = self.model.get_cpds(node)
            states = range(self.cardinality[node])
            evidence = cpd.variables[:0:-1]
            if evidence:
                cached_values = self.pre_compute_reduce(variable=node)
                evidence = np.vstack([sampled[i] for i in evidence])
                weights = list(map(lambda t: cached_values[tuple(t)], evidence.T))
            else:
                weights = cpd.values
            sampled[node] = sample_discrete(states, weights, size)

        return _return_samples(return_type, sampled)
Exemplo n.º 7
0
    def sample(self, start_state=None, size=1):
        """
        Sample from the Markov Chain.

        Parameters:
        -----------
        start_state: dict or array-like iterable
            Representing the starting states of the variables. If None is passed, a random start_state is chosen.
        size: int
            Number of samples to be generated.

        Return Type:
        ------------
        pandas.DataFrame

        Examples:
        ---------
        >>> from pgmpy.factors import Factor
        >>> from pgmpy.inference import GibbsSampling
        >>> from pgmpy.models import MarkovModel
        >>> model = MarkovModel([('A', 'B'), ('C', 'B')])
        >>> factor_ab = Factor(['A', 'B'], [2, 2], [1, 2, 3, 4])
        >>> factor_cb = Factor(['C', 'B'], [2, 2], [5, 6, 7, 8])
        >>> model.add_factors(factor_ab, factor_cb)
        >>> gibbs = GibbsSampling(model)
        >>> gibbs.sample(size=4)
           A  B  C
        0  0  1  1
        1  1  0  0
        2  1  1  0
        3  1  1  1
        """
        if start_state is None and self.state is None:
            self.state = self.random_state()
        elif start_state is not None:
            self.set_start_state(start_state)

        sampled = DataFrame(index=range(size), columns=self.variables)
        sampled.loc[0] = [st for var, st in self.state]
        for i in range(size - 1):
            for j, (var, st) in enumerate(self.state):
                other_st = tuple(st for v, st in self.state if var != v)
                next_st = sample_discrete(
                    list(range(self.cardinalities[var])),
                    self.transition_models[var][other_st])[0]
                self.state[j] = State(var, next_st)
            sampled.loc[i + 1] = [st for var, st in self.state]
        return sampled
Exemplo n.º 8
0
    def forward_sample(self, size=1):
        """
        Generates sample(s) from joint distribution of the bayesian network.

        Parameters
        ----------
        size: int
            size of sample to be generated

        Returns
        -------
        sampled: pandas.DataFrame
            the generated samples

        Examples
        --------
        >>> from pgmpy.models.BayesianModel import BayesianModel
        >>> from pgmpy.factors.CPD import TabularCPD
        >>> from pgmpy.inference.Sampling import BayesianModelSampling
        >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        >>> cpd_d = TabularCPD('diff', 2, [[0.6], [0.4]])
        >>> cpd_i = TabularCPD('intel', 2, [[0.7], [0.3]])
        >>> cpd_g = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25,
        ...                0.08, 0.3], [0.3, 0.7, 0.02, 0.2]],
        ...                ['intel', 'diff'], [2, 2])
        >>> student.add_cpds(cpd_d, cpd_i, cpd_g)
        >>> inference = BayesianModelSampling(student)
        >>> inference.forward_sample(2)
                diff       intel       grade
        0        1           0          1
        1        1           0          2
        """
        sampled = DataFrame(index=range(size), columns=self.topological_order)
        for node in self.topological_order:
            cpd = self.model.get_cpds(node)
            states = range(self.cardinality[node])
            evidence = cpd.variables[:0:-1]
            if evidence:
                cached_values = self.pre_compute_reduce(variable=node)
                evidence = sampled.ix[:, evidence].values
                weights = list(map(lambda t: cached_values[tuple(t)],
                                   evidence))
            else:
                weights = cpd.values
            sampled[node] = sample_discrete(states, weights, size)
        return sampled
Exemplo n.º 9
0
    def generate_sample(self, start_state=None, size=1):
        """
        Generator version of self.sample

        Return Type:
        ------------
        List of State namedtuples, representing the assignment to all variables of the model.

        Examples:
        ---------
        >>> from pgmpy.factors.discrete import DiscreteFactor
        >>> from pgmpy.sampling import GibbsSampling
        >>> from pgmpy.models import MarkovModel
        >>> model = MarkovModel([('A', 'B'), ('C', 'B')])
        >>> factor_ab = DiscreteFactor(['A', 'B'], [2, 2], [1, 2, 3, 4])
        >>> factor_cb = DiscreteFactor(['C', 'B'], [2, 2], [5, 6, 7, 8])
        >>> model.add_factors(factor_ab, factor_cb)
        >>> gibbs = GibbsSampling(model)
        >>> gen = gibbs.generate_sample(size=2)
        >>> [sample for sample in gen]
        [[State(var='C', state=1), State(var='B', state=1), State(var='A', state=0)],
         [State(var='C', state=0), State(var='B', state=1), State(var='A', state=1)]]
        """

        if start_state is None and self.state is None:
            self.state = self.random_state()
        elif start_state is not None:
            self.set_start_state(start_state)

        for i in range(size):
            for j, (var, st) in enumerate(self.state):
                other_st = tuple(st for v, st in self.state if var != v)
                next_st = sample_discrete(list(range(self.cardinalities[var])),
                                          self.transition_models[var][other_st])[0]
                self.state[j] = State(var, next_st)
            yield self.state[:]
Exemplo n.º 10
0
    def likelihood_weighted_sample(self,
                                   evidence=None,
                                   size=1,
                                   return_type="dataframe"):
        """
        Generates weighted sample(s) from joint distribution of the bayesian
        network, that comply with the given evidence.
        'Probabilistic Graphical Model Principles and Techniques', Koller and
        Friedman, Algorithm 12.2 pp 493.

        Parameters
        ----------
        evidence: list of `pgmpy.factor.State` namedtuples
            None if no evidence
        size: int
            size of sample to be generated
        return_type: string (dataframe | recarray)
            Return type for samples, either of 'dataframe' or 'recarray'.
            Defaults to 'dataframe'

        Returns
        -------
        sampled: A pandas.DataFrame or a numpy.recarray object depending upon return_type argument
            the generated samples with corresponding weights

        Examples
        --------
        >>> from pgmpy.factors.discrete import State
        >>> from pgmpy.models.BayesianModel import BayesianModel
        >>> from pgmpy.factors.discrete import TabularCPD
        >>> from pgmpy.sampling import BayesianModelSampling
        >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        >>> cpd_d = TabularCPD('diff', 2, [[0.6], [0.4]])
        >>> cpd_i = TabularCPD('intel', 2, [[0.7], [0.3]])
        >>> cpd_g = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25,
        ...         0.08, 0.3], [0.3, 0.7, 0.02, 0.2]],
        ...         ['intel', 'diff'], [2, 2])
        >>> student.add_cpds(cpd_d, cpd_i, cpd_g)
        >>> inference = BayesianModelSampling(student)
        >>> evidence = [State('diff', 0)]
        >>> inference.likelihood_weighted_sample(evidence=evidence, size=2, return_type='recarray')
        rec.array([(0, 0, 1, 0.6), (0, 0, 2, 0.6)], 
          dtype=[('diff', '<i8'), ('intel', '<i8'), ('grade', '<i8'), ('_weight', '<f8')])
        """
        types = [(var_name, 'int') for var_name in self.topological_order]
        types.append(('_weight', 'float'))
        sampled = np.zeros(size, dtype=types).view(np.recarray)
        sampled['_weight'] = np.ones(size)
        evidence_dict = {var: st for var, st in evidence}

        for node in self.topological_order:
            cpd = self.model.get_cpds(node)
            states = range(self.cardinality[node])
            evidence = cpd.get_evidence()

            if evidence:
                evidence_values = np.vstack([sampled[i] for i in evidence])
                cached_values = self.pre_compute_reduce(node)
                weights = list(
                    map(lambda t: cached_values[tuple(t)], evidence_values.T))
                if node in evidence_dict:
                    sampled[node] = evidence_dict[node]
                    for i in range(size):
                        sampled['_weight'][i] *= weights[i][
                            evidence_dict[node]]
                else:
                    sampled[node] = sample_discrete(states, weights)
            else:
                if node in evidence_dict:
                    sampled[node] = evidence_dict[node]
                    for i in range(size):
                        sampled['_weight'][i] *= cpd.values[
                            evidence_dict[node]]
                else:
                    sampled[node] = sample_discrete(states, cpd.values, size)

        return _return_samples(return_type, sampled)
Exemplo n.º 11
0
    def likelihood_weighted_sample(self, evidence=None, size=1, return_type="dataframe"):
        """
        Generates weighted sample(s) from joint distribution of the bayesian
        network, that comply with the given evidence.
        'Probabilistic Graphical Model Principles and Techniques', Koller and
        Friedman, Algorithm 12.2 pp 493.

        Parameters
        ----------
        evidence: list of `pgmpy.factor.State` namedtuples
            None if no evidence
        size: int
            size of sample to be generated
        return_type: string (dataframe | recarray)
            Return type for samples, either of 'dataframe' or 'recarray'.
            Defaults to 'dataframe'

        Returns
        -------
        sampled: A pandas.DataFrame or a numpy.recarray object depending upon return_type argument
            the generated samples with corresponding weights

        Examples
        --------
        >>> from pgmpy.factors.discrete import State
        >>> from pgmpy.models.BayesianModel import BayesianModel
        >>> from pgmpy.factors.discrete import TabularCPD
        >>> from pgmpy.sampling import BayesianModelSampling
        >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        >>> cpd_d = TabularCPD('diff', 2, [[0.6], [0.4]])
        >>> cpd_i = TabularCPD('intel', 2, [[0.7], [0.3]])
        >>> cpd_g = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25,
        ...         0.08, 0.3], [0.3, 0.7, 0.02, 0.2]],
        ...         ['intel', 'diff'], [2, 2])
        >>> student.add_cpds(cpd_d, cpd_i, cpd_g)
        >>> inference = BayesianModelSampling(student)
        >>> evidence = [State('diff', 0)]
        >>> inference.likelihood_weighted_sample(evidence=evidence, size=2, return_type='recarray')
        rec.array([(0, 0, 1, 0.6), (0, 0, 2, 0.6)], 
          dtype=[('diff', '<i8'), ('intel', '<i8'), ('grade', '<i8'), ('_weight', '<f8')])
        """
        types = [(var_name, 'int') for var_name in self.topological_order]
        types.append(('_weight', 'float'))
        sampled = np.zeros(size, dtype=types).view(np.recarray)
        sampled['_weight'] = np.ones(size)
        evidence_dict = {var: st for var, st in evidence}

        for node in self.topological_order:
            cpd = self.model.get_cpds(node)
            states = range(self.cardinality[node])
            evidence = cpd.get_evidence()

            if evidence:
                evidence_values = np.vstack([sampled[i] for i in evidence])
                cached_values = self.pre_compute_reduce(node)
                weights = list(map(lambda t: cached_values[tuple(t)], evidence_values.T))
                if node in evidence_dict:
                    sampled[node] = evidence_dict[node]
                    for i in range(size):
                        sampled['_weight'][i] *= weights[i][evidence_dict[node]]
                else:
                    sampled[node] = sample_discrete(states, weights)
            else:
                if node in evidence_dict:
                    sampled[node] = evidence_dict[node]
                    for i in range(size):
                        sampled['_weight'][i] *= cpd.values[evidence_dict[node]]
                else:
                    sampled[node] = sample_discrete(states, cpd.values, size)

        return _return_samples(return_type, sampled)
Exemplo n.º 12
0
    def likelihood_weighted_sample(self, evidence=None, size=1):
        """
        Generates weighted sample(s) from joint distribution of the bayesian
        network, that comply with the given evidence.
        'Probabilistic Graphical Model Principles and Techniques', Koller and
        Friedman, Algorithm 12.2 pp 493.

        Parameters
        ----------
        evidence: list of `pgmpy.factor.State` namedtuples
            None if no evidence
        size: int
            size of sample to be generated

        Returns
        -------
        sampled: pandas.DataFrame
            the generated samples with corresponding weights

        Examples
        --------
        >>> from pgmpy.factors.Factor import State
        >>> from pgmpy.models.BayesianModel import BayesianModel
        >>> from pgmpy.factors.CPD import TabularCPD
        >>> from pgmpy.inference.Sampling import BayesianModelSampling
        >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        >>> cpd_d = TabularCPD('diff', 2, [[0.6], [0.4]])
        >>> cpd_i = TabularCPD('intel', 2, [[0.7], [0.3]])
        >>> cpd_g = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25,
        ...         0.08, 0.3], [0.3, 0.7, 0.02, 0.2]],
        ...         ['intel', 'diff'], [2, 2])
        >>> student.add_cpds(cpd_d, cpd_i, cpd_g)
        >>> inference = BayesianModelSampling(student)
        >>> evidence = {'diff': State(var='diff',state=0)}
        >>> inference.likelihood_weighted_sample(evidence, 2)
                intel       diff       grade  _weight
        0  (intel, 0)  (diff, 0)  (grade, 1)      0.6
        1  (intel, 1)  (diff, 0)  (grade, 1)      0.6
        """
        sampled = DataFrame(index=range(size), columns=self.topological_order)
        sampled['_weight'] = np.ones(size)
        evidence_dict = {var: st for var, st in evidence}
        for node in self.topological_order:
            cpd = self.cpds[node]
            states = [st for var, st in cpd.variables[node]]
            if cpd.evidence:
                indices = [i for i, x in enumerate(self.topological_order) if x in cpd.evidence]
                evidence = sampled.values[:, [indices]].tolist()
                weights = list(map(lambda t: cpd.reduce(t[0], inplace=False).values, evidence))
                if node in evidence_dict:
                    sampled[node] = (State(node, evidence_dict[node]), ) * size
                    for i in range(size):
                        sampled.loc[i, '_weight'] *= weights[i][evidence_dict[node]]
                else:
                    sampled[node] = list(map(lambda t: State(node, t), sample_discrete(states, weights)))
            else:
                if node in evidence_dict:
                    sampled[node] = (State(node, evidence_dict[node]), ) * size
                    for i in range(size):
                        sampled.loc[i, '_weight'] *= cpd.values[evidence_dict[node]]
                else:
                    sampled[node] = list(map(lambda t: State(node, t),
                                         sample_discrete(states, cpd.values, size)))
        return sampled
Exemplo n.º 13
0
    def likelihood_weighted_sample(self, evidence=None, size=1):
        """
        Generates weighted sample(s) from joint distribution of the bayesian
        network, that comply with the given evidence.
        'Probabilistic Graphical Model Principles and Techniques', Koller and
        Friedman, Algorithm 12.2 pp 493.

        Parameters
        ----------
        evidence: list of `pgmpy.factor.State` namedtuples
            None if no evidence
        size: int
            size of sample to be generated

        Returns
        -------
        sampled: pandas.DataFrame
            the generated samples with corresponding weights

        Examples
        --------
        >>> from pgmpy.factors.Factor import State
        >>> from pgmpy.models.BayesianModel import BayesianModel
        >>> from pgmpy.factors.CPD import TabularCPD
        >>> from pgmpy.inference.Sampling import BayesianModelSampling
        >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        >>> cpd_d = TabularCPD('diff', 2, [[0.6], [0.4]])
        >>> cpd_i = TabularCPD('intel', 2, [[0.7], [0.3]])
        >>> cpd_g = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25,
        ...         0.08, 0.3], [0.3, 0.7, 0.02, 0.2]],
        ...         ['intel', 'diff'], [2, 2])
        >>> student.add_cpds(cpd_d, cpd_i, cpd_g)
        >>> inference = BayesianModelSampling(student)
        >>> evidence = [State('diff', 0)]
        >>> inference.likelihood_weighted_sample(evidence, 2)
                intel       diff       grade  _weight
        0         0          0          1        0.6
        1         1          0          1        0.6
        """
        sampled = DataFrame(index=range(size), columns=self.topological_order)
        sampled['_weight'] = np.ones(size)
        evidence_dict = {var: st for var, st in evidence}
        for node in self.topological_order:
            cpd = self.model.get_cpds(node)
            states = range(self.cardinality[node])
            if cpd.evidence:
                evidence = sampled.ix[:, cpd.evidence].values
                cached_values = self.pre_compute_reduce(node)
                weights = list(map(lambda t: cached_values[tuple(t)], evidence))
                if node in evidence_dict:
                    sampled[node] = evidence_dict[node]
                    for i in range(size):
                        sampled.loc[i, '_weight'] *= weights[i][evidence_dict[node]]
                else:
                    sampled[node] = sample_discrete(states, weights)
            else:
                if node in evidence_dict:
                    sampled[node] = evidence_dict[node]
                    for i in range(size):
                        sampled.loc[i, '_weight'] *= cpd.values[evidence_dict[node]]
                else:
                    sampled[node] = sample_discrete(states, cpd.values, size)
        return sampled
    def sample(self,
               evidence=None,
               start_state=None,
               size=1,
               return_type="dataframe"):
        """
        Sample from the Markov Chain.

        Parameters:
        -----------
        start_state: dict or array-like iterable
            Representing the starting states of the variables. If None is passed, a random start_state is chosen.
        evidence: array-like iterable
            Representing states of the evidence variables
        size: int
            Number of samples to be generated.
        return_type: string (dataframe | recarray)
            Return type for samples, either of 'dataframe' or 'recarray'.
            Defaults to 'dataframe'

        Returns
        -------
        sampled: A pandas.DataFrame or a numpy.recarray object depending upon return_type argument
            the generated samples

        Examples:
        ---------
        >>> from pgmpy.models.BayesianModel import BayesianModel
        >>> from pgmpy.factors.discrete import TabularCPD
        >>> from GibbsSamplingWithEvidence import GibbsSampling
        >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        >>> cpd_d = TabularCPD('diff', 2, [[0.6], [0.4]])
        >>> cpd_i = TabularCPD('intel', 2, [[0.7], [0.3]])
        >>> cpd_g = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25,
        ...                0.08, 0.3], [0.3, 0.7, 0.02, 0.2]],
        ...                ['intel', 'diff'], [2, 2])
        >>> student.add_cpds(cpd_d, cpd_i, cpd_g)
        >>> gibbs_sampler = GibbsSampling(student)
        >>> samples = gibbs_sampler.sample(size=5, evidence=[('grade',1)])
        >>> print(samples)
        diff  grade  intel
        0     1      1      1
        1     1      1      0
        2     0      1      0
        3     0      1      0
        4     1      1      0

        """
        if start_state is None and self.state is None:
            self.state = self.random_state()
        elif start_state is not None:
            self.set_start_state(start_state)

        #overwriting with evidence
        if evidence is not None:
            for j, (var, st) in enumerate(self.state):
                for key, value in evidence.items():
                    #for k, (v_e, st_e) in enumerate(evidence):
                    if var == key:
                        #print(var, self.state[j])
                        self.state[j] = State(var, value)

        types = [(var_name, 'int') for var_name in self.variables]
        sampled = np.zeros(size, dtype=types).view(np.recarray)
        sampled[0] = tuple([st for var, st in self.state])

        for i in range(size - 1):
            for j, (var, st) in enumerate(self.state):
                # check for evidence
                next_st = None
                if evidence is not None:
                    for v_e, st_e in evidence.items():
                        if var == v_e:
                            next_st = st_e

                if next_st is None:
                    other_st = tuple(st for v, st in self.state if var != v)
                    next_st = sample_discrete(
                        list(range(self.cardinalities[var])),
                        self.transition_models[var][other_st])[0]
                self.state[j] = State(var, next_st)
            sampled[i + 1] = tuple([st for var, st in self.state])

        return _return_samples(return_type, sampled)