Exemplo n.º 1
0
    def _train_simple_em_log(self, data, model, maxiter, thresh):
        # Likelihood is kept
        like    = N.zeros(maxiter)

        # Em computation, with computation of the likelihood
        g, tgd  = model.compute_log_responsabilities(data)
        like[0] = N.sum(densities.logsumexp(tgd), axis = 0)
        model.update_em(data, N.exp(g))
        for i in range(1, maxiter):
            g, tgd  = model.compute_log_responsabilities(data)
            like[i] = N.sum(densities.logsumexp(tgd), axis = 0)
            model.update_em(data, N.exp(g))
            if has_em_converged(like[i], like[i-1], thresh):
                return like[0:i]
Exemplo n.º 2
0
    def _train_simple_em_log(self, data, model, maxiter, thresh):
        # Likelihood is kept
        like = N.zeros(maxiter)

        # Em computation, with computation of the likelihood
        g, tgd = model.compute_log_responsabilities(data)
        like[0] = N.sum(densities.logsumexp(tgd), axis=0)
        model.update_em(data, N.exp(g))
        for i in range(1, maxiter):
            g, tgd = model.compute_log_responsabilities(data)
            like[i] = N.sum(densities.logsumexp(tgd), axis=0)
            model.update_em(data, N.exp(g))
            if has_em_converged(like[i], like[i - 1], thresh):
                return like[0:i]
Exemplo n.º 3
0
    def pdf(self, x, log=False):
        """Computes the pdf of the model at given points.

        :Parameters:
            x : ndarray
                points where to estimate the pdf. One row for one
                multi-dimensional sample (eg to estimate the pdf at 100
                different points in 10 dimension, data's shape should be (100,
                20)).
            log : bool
                If true, returns the log pdf instead of the pdf.

        :Returns:
            y : ndarray
                the pdf at points x."""
        if log:
            return D.logsumexp(D.multiple_gauss_den(x, self.mu, self.va, log=True) + N.log(self.w))
        else:
            return N.sum(D.multiple_gauss_den(x, self.mu, self.va) * self.w, 1)
Exemplo n.º 4
0
    def pdf(self, x, log=False):
        """Computes the pdf of the model at given points.

        :Parameters:
            x : ndarray
                points where to estimate the pdf. One row for one
                multi-dimensional sample (eg to estimate the pdf at 100
                different points in 10 dimension, data's shape should be (100,
                20)).
            log : bool
                If true, returns the log pdf instead of the pdf.

        :Returns:
            y : ndarray
                the pdf at points x."""
        if log:
            return D.logsumexp(
                D.multiple_gauss_den(x, self.mu, self.va, log=True) +
                N.log(self.w))
        else:
            return N.sum(D.multiple_gauss_den(x, self.mu, self.va) * self.w, 1)
Exemplo n.º 5
0
    def compute_log_responsabilities(self, data):
        """Compute log responsabilities.
        
        Return normalized and non-normalized responsabilities for the model (in
        the log domain)
        
        Note
        ----
        Computes the latent variable distribution (a posteriori probability)
        knowing the explicit data for the Gaussian model (w, mu, var): gamma(t,
        i) = P[state = i | observation = data(t); w, mu, va]

        This is basically the E step of EM for finite mixtures."""
        # compute the gaussian pdf
        tgd	= densities.multiple_gauss_den(data, self.gm.mu, 
                                           self.gm.va, log = True)
        # multiply by the weight
        tgd	+= N.log(self.gm.w)
        # Normalize to get a (log) pdf
        gd	= tgd  - densities.logsumexp(tgd)[:, N.newaxis]

        return gd, tgd
Exemplo n.º 6
0
    def compute_log_responsabilities(self, data):
        """Compute log responsabilities.
        
        Return normalized and non-normalized responsabilities for the model (in
        the log domain)
        
        Note
        ----
        Computes the latent variable distribution (a posteriori probability)
        knowing the explicit data for the Gaussian model (w, mu, var): gamma(t,
        i) = P[state = i | observation = data(t); w, mu, va]

        This is basically the E step of EM for finite mixtures."""
        # compute the gaussian pdf
        tgd = densities.multiple_gauss_den(data,
                                           self.gm.mu,
                                           self.gm.va,
                                           log=True)
        # multiply by the weight
        tgd += N.log(self.gm.w)
        # Normalize to get a (log) pdf
        gd = tgd - densities.logsumexp(tgd)[:, N.newaxis]

        return gd, tgd
Exemplo n.º 7
0
    def train(self, data, model, maxiter=20, thresh=1e-5):
        """Train a model using EM.

        Train a model using data, and stops when the likelihood increase
        between two consecutive iteration fails behind a threshold, or when the
        number of iterations > niter, whichever comes first

        :Parameters:
            data : ndarray
                contains the observed features, one row is one frame, ie one
                observation of dimension d
            model : GMM
                GMM instance.
            maxiter : int
                maximum number of iterations
            thresh : threshold
                if the slope of the likelihood falls below this value, the
                algorithm stops.

        :Returns:
            likelihood : ndarray
                one value per iteration.

        Note
        ----
        The model is trained, and its parameters updated accordingly, eg the
        results are put in the GMM instance.
        """
        mode = model.gm.mode

        # Build regularizer
        if mode == 'diag':
            regularize = curry(regularize_diag,
                               np=self.pcnt,
                               prior=self.pval * N.ones(model.gm.d))
        elif mode == 'full':
            regularize = curry(regularize_full,
                               np=self.pcnt,
                               prior=self.pval * N.eye(model.gm.d))
        else:
            raise ValueError("unknown variance mode")

        model.init(data)
        regularize(model.gm.va)

        # Likelihood is kept
        like = N.empty(maxiter, N.float)

        # Em computation, with computation of the likelihood
        g, tgd = model.compute_log_responsabilities(data)
        g = N.exp(g)
        model.update_em(data, g)
        regularize(model.gm.va)

        like[0] = N.sum(densities.logsumexp(tgd), axis=0)
        for i in range(1, maxiter):
            g, tgd = model.compute_log_responsabilities(data)
            g = N.exp(g)
            model.update_em(data, g)
            regularize(model.gm.va)

            like[i] = N.sum(densities.logsumexp(tgd), axis=0)
            if has_em_converged(like[i], like[i - 1], thresh):
                return like[0:i]
        return like
Exemplo n.º 8
0
    def train(self, data, model, maxiter = 20, thresh = 1e-5):
        """Train a model using EM.

        Train a model using data, and stops when the likelihood increase
        between two consecutive iteration fails behind a threshold, or when the
        number of iterations > niter, whichever comes first

        :Parameters:
            data : ndarray
                contains the observed features, one row is one frame, ie one
                observation of dimension d
            model : GMM
                GMM instance.
            maxiter : int
                maximum number of iterations
            thresh : threshold
                if the slope of the likelihood falls below this value, the
                algorithm stops.

        :Returns:
            likelihood : ndarray
                one value per iteration.

        Note
        ----
        The model is trained, and its parameters updated accordingly, eg the
        results are put in the GMM instance.
        """
        mode = model.gm.mode

        # Build regularizer
        if mode == 'diag':
            regularize = curry(regularize_diag, np = self.pcnt, prior =
                    self.pval * N.ones(model.gm.d))
        elif mode == 'full':
            regularize = curry(regularize_full, np = self.pcnt, prior =
                    self.pval * N.eye(model.gm.d))
        else:
            raise ValueError("unknown variance mode")

        model.init(data)
        regularize(model.gm.va)

        # Likelihood is kept
        like = N.empty(maxiter, N.float)

        # Em computation, with computation of the likelihood
        g, tgd  = model.compute_log_responsabilities(data)
        g = N.exp(g)
        model.update_em(data, g)
        regularize(model.gm.va)

        like[0] = N.sum(densities.logsumexp(tgd), axis = 0)
        for i in range(1, maxiter):
            g, tgd  = model.compute_log_responsabilities(data)
            g = N.exp(g)
            model.update_em(data, g)
            regularize(model.gm.va)

            like[i] = N.sum(densities.logsumexp(tgd), axis = 0)
            if has_em_converged(like[i], like[i-1], thresh):
                return like[0:i]