Пример #1
0
    def __init__(self, prior, likelihood=None):
        # Normal-Gamma conjugate
        self.prior = prior

        # Normal-Gamma posterior
        self.posterior = copy.deepcopy(prior)

        # Gaussian likelihood
        if likelihood is not None:
            self.likelihood = likelihood
        else:
            mu, lmbdas = self.prior.rvs()
            self.likelihood = GaussianWithDiagonalPrecision(mu=mu,
                                                            lmbdas=lmbdas)
Пример #2
0
    def _expected_log_likelihood(self, x):
        # Natural parameter of marginal log-distirbution
        # are the expected statsitics of the posterior
        nat_param = self.expected_statistics()

        # Data statistics under a Gaussian likelihood
        # log-parition is subsumed into nat*stats
        liklihood = GaussianWithDiagonalPrecision(
            mu=np.empty_like(nat_param[0]))
        stats = liklihood.statistics(x, vectorize=True)
        log_base = liklihood.log_base()

        return log_base + np.einsum('k,nk->n', nat_param[0], stats[0])\
               + np.einsum('k,nk->n', nat_param[1], stats[1])\
               + np.einsum('k,nk->n', nat_param[2], stats[2])\
               + np.einsum('k,nk->n', nat_param[3], stats[3])
Пример #3
0
 def log_posterior_predictive_gaussian(self, x):
     mu, lmbdas = self.posterior_predictive_gaussian()
     return GaussianWithDiagonalPrecision(mu=mu,
                                          lmbdas=lmbdas).log_likelihood(x)
Пример #4
0
class GaussianWithNormalGamma:
    """
    Multivariate Diagonal Gaussian distribution class.
    Uses a Normal-Gamma prior and posterior
    Parameters are mean and precision matrix:
        mu, lmbdas
    """
    def __init__(self, prior, likelihood=None):
        # Normal-Gamma conjugate
        self.prior = prior

        # Normal-Gamma posterior
        self.posterior = copy.deepcopy(prior)

        # Gaussian likelihood
        if likelihood is not None:
            self.likelihood = likelihood
        else:
            mu, lmbdas = self.prior.rvs()
            self.likelihood = GaussianWithDiagonalPrecision(mu=mu,
                                                            lmbdas=lmbdas)

    def empirical_bayes(self, data):
        self.prior.nat_param = self.likelihood.statistics(data)
        self.likelihood.params = self.prior.rvs()
        return self

    # Max a posteriori
    def max_aposteriori(self, data, weights=None):
        stats = self.likelihood.statistics(data) if weights is None\
            else self.likelihood.weighted_statistics(data, weights)
        self.posterior.nat_param = self.prior.nat_param + stats

        self.likelihood.params = self.posterior.mode(
        )  # mode of gamma might not exist
        return self

    # Gibbs sampling
    def resample(self, data=[]):
        stats = self.likelihood.statistics(data)
        self.posterior.nat_param = self.prior.nat_param + stats

        self.likelihood.params = self.posterior.rvs()
        return self

    # Mean field
    def meanfield_update(self, data, weights=None):
        stats = self.likelihood.statistics(data) if weights is None\
            else self.likelihood.weighted_statistics(data, weights)
        self.posterior.nat_param = self.prior.nat_param + stats

        self.likelihood.params = self.posterior.rvs()
        return self

    def meanfield_sgdstep(self, data, weights, prob, stepsize):
        stats = self.likelihood.statistics(data) if weights is None\
            else self.likelihood.weighted_statistics(data, weights)
        self.posterior.nat_param = (1. - stepsize) * self.posterior.nat_param\
                                   + stepsize * (self.prior.nat_param + 1. / prob * stats)

        self.likelihood.params = self.posterior.rvs()
        return self

    def variational_lowerbound(self):
        q_entropy = self.posterior.entropy()
        qp_cross_entropy = self.prior.cross_entropy(self.posterior)
        return q_entropy - qp_cross_entropy

    def log_marginal_likelihood(self):
        log_partition_prior = self.prior.log_partition()
        log_partition_posterior = self.posterior.log_partition()
        return log_partition_posterior - log_partition_prior

    def posterior_predictive_gaussian(self):
        mu, kappas, alphas, betas = self.posterior.params
        c = 1. + 1. / kappas
        lmbdas = (alphas / betas) * 1. / c
        return mu, lmbdas

    def log_posterior_predictive_gaussian(self, x):
        mu, lmbdas = self.posterior_predictive_gaussian()
        return GaussianWithDiagonalPrecision(mu=mu,
                                             lmbdas=lmbdas).log_likelihood(x)

    def posterior_predictive_studentt(self):
        mu, kappas, alphas, betas = self.posterior.params
        dfs = 2. * alphas
        c = 1. + 1. / kappas
        lmbdas = (alphas / betas) * 1. / c
        return mu, lmbdas, dfs

    def log_posterior_predictive_studentt(self, x):
        mu, lmbdas, dfs = self.posterior_predictive_studentt()
        log_posterior = 0.
        for _x, _mu, _lmbda, _df in zip(x, mu, lmbdas, dfs):
            log_posterior += mvt_logpdf(_x.reshape(-1, 1), _mu.reshape(-1, 1),
                                        _lmbda.reshape(-1, 1, 1), _df)
        return log_posterior
Пример #5
0
npr.seed(1337)

gating = Categorical(K=2)

components = [
    GaussianWithDiagonalCovariance(mu=np.array([1., 1.]),
                                   sigmas=np.array([0.25, 0.5])),
    GaussianWithDiagonalCovariance(mu=np.array([-1., -1.]),
                                   sigmas=np.array([0.5, 0.25]))
]

gmm = MixtureOfGaussians(gating=gating, components=components)

obs, z = gmm.rvs(500)
gmm.plot(obs)

gating = Categorical(K=2)
components = [
    GaussianWithDiagonalPrecision(mu=npr.randn(2, ), lmbdas=np.ones((2, )))
    for _ in range(2)
]

model = MixtureOfGaussians(gating=gating, components=components)

print('Expecation Maximization')
model.max_likelihood(obs, maxiter=500)

plt.figure()
model.plot(obs)
Пример #6
0
 def log_likelihood(self, x):
     mu, lmbdas = x
     return GaussianWithDiagonalPrecision(mu=self.gaussian.mu,
                                          lmbdas=self.kappas * lmbdas).log_likelihood(mu)\
            + self.gamma.log_likelihood(lmbdas)
Пример #7
0
 def __init__(self, mu, kappas, alphas, betas):
     self.gaussian = GaussianWithDiagonalPrecision(mu=mu)
     self.gamma = Gamma(alphas=alphas, betas=betas)
     self.kappas = kappas
Пример #8
0
class NormalGamma(Distribution):
    def __init__(self, mu, kappas, alphas, betas):
        self.gaussian = GaussianWithDiagonalPrecision(mu=mu)
        self.gamma = Gamma(alphas=alphas, betas=betas)
        self.kappas = kappas

    @property
    def dim(self):
        return self.gaussian.dim

    @property
    def params(self):
        return self.gaussian.mu, self.kappas, self.gamma.alphas, self.gamma.betas

    @params.setter
    def params(self, values):
        self.gaussian.mu, self.kappas, self.gamma.alphas, self.gamma.betas = values

    def rvs(self, size=1):
        lmbdas = self.gamma.rvs()
        self.gaussian.lmbdas = self.kappas * lmbdas
        mu = self.gaussian.rvs()
        return mu, lmbdas

    def mean(self):
        return self.gaussian.mean(), self.gamma.mean()

    def mode(self):
        return self.gaussian.mode(), self.gamma.mode()

    def log_likelihood(self, x):
        mu, lmbdas = x
        return GaussianWithDiagonalPrecision(mu=self.gaussian.mu,
                                             lmbdas=self.kappas * lmbdas).log_likelihood(mu)\
               + self.gamma.log_likelihood(lmbdas)

    @property
    def base(self):
        return self.gaussian.base * self.gamma.base

    def log_base(self):
        return np.log(self.base)

    @property
    def nat_param(self):
        return self.std_to_nat(self.params)

    @nat_param.setter
    def nat_param(self, natparam):
        self.params = self.nat_to_std(natparam)

    @staticmethod
    def std_to_nat(params):
        # The definition of stats is slightly different
        # from literatur to make posterior updates easy

        # Assumed stats
        # stats = [lmbdas * x,
        #          -0.5 * lmbdas * xx,
        #          0.5 * log_lmbdas
        #          -0.5 * lmbdas]

        mu = params[1] * params[0]
        kappas = params[1]
        alphas = 2. * params[2] - 1.
        betas = 2. * params[3] + params[1] * params[0]**2
        return Stats([mu, kappas, alphas, betas])

    @staticmethod
    def nat_to_std(natparam):
        mu = natparam[0] / natparam[1]
        kappas = natparam[1]
        alphas = 0.5 * (natparam[2] + 1.)
        betas = 0.5 * (natparam[3] - kappas * mu**2)
        return mu, kappas, alphas, betas

    def log_partition(self, params=None):
        mu, kappas, alphas, betas = params if params is not None else self.params
        return -0.5 * np.sum(np.log(kappas)) + Gamma(
            alphas=alphas, betas=betas).log_partition()

    def expected_statistics(self):
        # stats = [lmbdas * x,
        #          -0.5 * lmbdas * xx,
        #          0.5 * log_lmbdas
        #          -0.5 * lmbdas]

        E_x = self.gamma.alphas / self.gamma.betas * self.gaussian.mu
        E_lmbdas_xx = -0.5 * (1. / self.kappas + self.gaussian.mu * E_x)
        E_log_lmbdas = 0.5 * (digamma(self.gamma.alphas) -
                              np.log(self.gamma.betas))
        E_lmbdas = -0.5 * (self.gamma.alphas / self.gamma.betas)

        return E_x, E_lmbdas_xx, E_log_lmbdas, E_lmbdas

    def entropy(self):
        nat_param, stats = self.nat_param, self.expected_statistics()
        return self.log_partition() - self.log_base()\
               - (np.dot(nat_param[0], stats[0]) + np.dot(nat_param[1], stats[1])
                  + np.dot(nat_param[2], stats[2]) + np.dot(nat_param[3], stats[3]))

    def cross_entropy(self, dist):
        nat_param, stats = dist.nat_param, self.expected_statistics()
        return self.log_partition() - self.log_base()\
               - (np.dot(nat_param[0], stats[0]) + np.dot(nat_param[1], stats[1])
                  + np.dot(nat_param[2], stats[2]) + np.dot(nat_param[3], stats[3]))

    # This implementation is valid but terribly slow
    def _expected_log_likelihood(self, x):
        # Natural parameter of marginal log-distirbution
        # are the expected statsitics of the posterior
        nat_param = self.expected_statistics()

        # Data statistics under a Gaussian likelihood
        # log-parition is subsumed into nat*stats
        liklihood = GaussianWithDiagonalPrecision(
            mu=np.empty_like(nat_param[0]))
        stats = liklihood.statistics(x, vectorize=True)
        log_base = liklihood.log_base()

        return log_base + np.einsum('k,nk->n', nat_param[0], stats[0])\
               + np.einsum('k,nk->n', nat_param[1], stats[1])\
               + np.einsum('k,nk->n', nat_param[2], stats[2])\
               + np.einsum('k,nk->n', nat_param[3], stats[3])

    def expected_log_likelihood(self, x):
        E_x, E_lmbdas_xx, E_log_lmbdas, E_lmbdas = self.expected_statistics()
        return (x**2).dot(E_lmbdas) + x.dot(E_x)\
               + E_lmbdas_xx.sum() + E_log_lmbdas.sum()\
               - 0.5 * self.dim * np.log(2. * np.pi)