def _train_simple_em_log(self, data, model, maxiter, thresh): # Likelihood is kept like = N.zeros(maxiter) # Em computation, with computation of the likelihood g, tgd = model.compute_log_responsabilities(data) like[0] = N.sum(densities.logsumexp(tgd), axis = 0) model.update_em(data, N.exp(g)) for i in range(1, maxiter): g, tgd = model.compute_log_responsabilities(data) like[i] = N.sum(densities.logsumexp(tgd), axis = 0) model.update_em(data, N.exp(g)) if has_em_converged(like[i], like[i-1], thresh): return like[0:i]
def _train_simple_em_log(self, data, model, maxiter, thresh): # Likelihood is kept like = N.zeros(maxiter) # Em computation, with computation of the likelihood g, tgd = model.compute_log_responsabilities(data) like[0] = N.sum(densities.logsumexp(tgd), axis=0) model.update_em(data, N.exp(g)) for i in range(1, maxiter): g, tgd = model.compute_log_responsabilities(data) like[i] = N.sum(densities.logsumexp(tgd), axis=0) model.update_em(data, N.exp(g)) if has_em_converged(like[i], like[i - 1], thresh): return like[0:i]
def pdf(self, x, log=False): """Computes the pdf of the model at given points. :Parameters: x : ndarray points where to estimate the pdf. One row for one multi-dimensional sample (eg to estimate the pdf at 100 different points in 10 dimension, data's shape should be (100, 20)). log : bool If true, returns the log pdf instead of the pdf. :Returns: y : ndarray the pdf at points x.""" if log: return D.logsumexp(D.multiple_gauss_den(x, self.mu, self.va, log=True) + N.log(self.w)) else: return N.sum(D.multiple_gauss_den(x, self.mu, self.va) * self.w, 1)
def pdf(self, x, log=False): """Computes the pdf of the model at given points. :Parameters: x : ndarray points where to estimate the pdf. One row for one multi-dimensional sample (eg to estimate the pdf at 100 different points in 10 dimension, data's shape should be (100, 20)). log : bool If true, returns the log pdf instead of the pdf. :Returns: y : ndarray the pdf at points x.""" if log: return D.logsumexp( D.multiple_gauss_den(x, self.mu, self.va, log=True) + N.log(self.w)) else: return N.sum(D.multiple_gauss_den(x, self.mu, self.va) * self.w, 1)
def compute_log_responsabilities(self, data): """Compute log responsabilities. Return normalized and non-normalized responsabilities for the model (in the log domain) Note ---- Computes the latent variable distribution (a posteriori probability) knowing the explicit data for the Gaussian model (w, mu, var): gamma(t, i) = P[state = i | observation = data(t); w, mu, va] This is basically the E step of EM for finite mixtures.""" # compute the gaussian pdf tgd = densities.multiple_gauss_den(data, self.gm.mu, self.gm.va, log = True) # multiply by the weight tgd += N.log(self.gm.w) # Normalize to get a (log) pdf gd = tgd - densities.logsumexp(tgd)[:, N.newaxis] return gd, tgd
def compute_log_responsabilities(self, data): """Compute log responsabilities. Return normalized and non-normalized responsabilities for the model (in the log domain) Note ---- Computes the latent variable distribution (a posteriori probability) knowing the explicit data for the Gaussian model (w, mu, var): gamma(t, i) = P[state = i | observation = data(t); w, mu, va] This is basically the E step of EM for finite mixtures.""" # compute the gaussian pdf tgd = densities.multiple_gauss_den(data, self.gm.mu, self.gm.va, log=True) # multiply by the weight tgd += N.log(self.gm.w) # Normalize to get a (log) pdf gd = tgd - densities.logsumexp(tgd)[:, N.newaxis] return gd, tgd
def train(self, data, model, maxiter=20, thresh=1e-5): """Train a model using EM. Train a model using data, and stops when the likelihood increase between two consecutive iteration fails behind a threshold, or when the number of iterations > niter, whichever comes first :Parameters: data : ndarray contains the observed features, one row is one frame, ie one observation of dimension d model : GMM GMM instance. maxiter : int maximum number of iterations thresh : threshold if the slope of the likelihood falls below this value, the algorithm stops. :Returns: likelihood : ndarray one value per iteration. Note ---- The model is trained, and its parameters updated accordingly, eg the results are put in the GMM instance. """ mode = model.gm.mode # Build regularizer if mode == 'diag': regularize = curry(regularize_diag, np=self.pcnt, prior=self.pval * N.ones(model.gm.d)) elif mode == 'full': regularize = curry(regularize_full, np=self.pcnt, prior=self.pval * N.eye(model.gm.d)) else: raise ValueError("unknown variance mode") model.init(data) regularize(model.gm.va) # Likelihood is kept like = N.empty(maxiter, N.float) # Em computation, with computation of the likelihood g, tgd = model.compute_log_responsabilities(data) g = N.exp(g) model.update_em(data, g) regularize(model.gm.va) like[0] = N.sum(densities.logsumexp(tgd), axis=0) for i in range(1, maxiter): g, tgd = model.compute_log_responsabilities(data) g = N.exp(g) model.update_em(data, g) regularize(model.gm.va) like[i] = N.sum(densities.logsumexp(tgd), axis=0) if has_em_converged(like[i], like[i - 1], thresh): return like[0:i] return like
def train(self, data, model, maxiter = 20, thresh = 1e-5): """Train a model using EM. Train a model using data, and stops when the likelihood increase between two consecutive iteration fails behind a threshold, or when the number of iterations > niter, whichever comes first :Parameters: data : ndarray contains the observed features, one row is one frame, ie one observation of dimension d model : GMM GMM instance. maxiter : int maximum number of iterations thresh : threshold if the slope of the likelihood falls below this value, the algorithm stops. :Returns: likelihood : ndarray one value per iteration. Note ---- The model is trained, and its parameters updated accordingly, eg the results are put in the GMM instance. """ mode = model.gm.mode # Build regularizer if mode == 'diag': regularize = curry(regularize_diag, np = self.pcnt, prior = self.pval * N.ones(model.gm.d)) elif mode == 'full': regularize = curry(regularize_full, np = self.pcnt, prior = self.pval * N.eye(model.gm.d)) else: raise ValueError("unknown variance mode") model.init(data) regularize(model.gm.va) # Likelihood is kept like = N.empty(maxiter, N.float) # Em computation, with computation of the likelihood g, tgd = model.compute_log_responsabilities(data) g = N.exp(g) model.update_em(data, g) regularize(model.gm.va) like[0] = N.sum(densities.logsumexp(tgd), axis = 0) for i in range(1, maxiter): g, tgd = model.compute_log_responsabilities(data) g = N.exp(g) model.update_em(data, g) regularize(model.gm.va) like[i] = N.sum(densities.logsumexp(tgd), axis = 0) if has_em_converged(like[i], like[i-1], thresh): return like[0:i]