def _m_step(self, Y, log_resp): """M step. Parameters ---------- Y : array-like, shape (n_samples, n_features) log_resp : array-like, shape (n_samples, n_components) Logarithm of the posterior probabilities (or responsibilities) of the point of each sample in Y. """ Z = self._draw_conditionnal_Z(Y) while not self.threshold(Z,Y.shape[1]): #Condition de seuil Z = self._draw_conditionnal_Z(Y) print("Ajustement au seuil") n_samples, _ = Y.shape self.weights_, self.means_, self.covariances_ = ( _estimate_gaussian_parameters(Y, Z, self.reg_covar, self.covariance_type)) self.weights_ /= n_samples self.precisions_cholesky_ = _compute_precision_cholesky( self.covariances_, self.covariance_type) self._m_step_callback(Y)
def _onehot_to_initialparams(self, X, onehot, cov_type): """ Computes cluster weights, cluster means and cluster precisions from a given clustering. Parameters ---------- X : array-like, shape (n_samples, n_features) List of n_features-dimensional data points. Each row corresponds to a single data point. onehot : ndarray, shape (n_samples, n_clusters) Each row has a 1 indicating cluster membership, other entries are 0. cov_type : {'full', 'tied', 'diag', 'spherical'} Covariance type for Gaussian mixture model """ n = X.shape[0] weights, means, covariances = _estimate_gaussian_parameters( X, onehot, 1e-06, cov_type) weights /= n precisions_cholesky_ = _compute_precision_cholesky( covariances, cov_type) if cov_type == "tied": c = precisions_cholesky_ precisions = np.dot(c, c.T) elif cov_type == "diag": precisions = precisions_cholesky_ else: precisions = [np.dot(c, c.T) for c in precisions_cholesky_] return weights, means, precisions
def _m_step(self, X, log_resp): """M step. Parameters ---------- X : array-like, shape (n_samples, n_features) log_resp : array-like, shape (n_samples, n_components) Logarithm of the posterior probabilities (or responsibilities) of the point of each sample in X. """ n_samples, _ = X.shape self.weights_, self.means_, self.covariances_ = ( _estimate_gaussian_parameters(X, np.exp(log_resp), self.reg_covar, self.covariance_type)) ### Bound covariance self.means_ = np.clip(self.means_, -action_bound, action_bound) self.covariances_ = np.clip(self.covariances_, np.exp(-2 * sigma_bound), np.exp(2 * sigma_bound)) ### self.weights_ /= n_samples self.precisions_cholesky_ = _compute_precision_cholesky( self.covariances_, self.covariance_type)
def _initialize(self, X, resp): # TODO: Initialize A * A.T """Initialization of the Gaussian mixture parameters. Parameters ---------- X : array-like, shape (n_samples, n_features) resp : array-like, shape (n_samples, n_components) """ n_samples, _ = X.shape self.y_sub = self._estimate_subspace_repr(X) weights, means, covariances = _estimate_gaussian_parameters( self.y_sub, resp, self.reg_covar, self.covariance_type) weights /= n_samples self.weights_ = (weights if self.weights_init is None else self.weights_init) self.means_ = means if self.means_init is None else self.means_init if self.precisions_init is None: self.covariances_ = covariances self.precisions_cholesky_ = _compute_precision_cholesky( covariances, self.covariance_type) elif self.covariance_type == 'full': self.precisions_cholesky_ = np.array([ linalg.cholesky(prec_init, lower=True) for prec_init in self.precisions_init ]) elif self.covariance_type == 'tied': self.precisions_cholesky_ = linalg.cholesky(self.precisions_init, lower=True) else: self.precisions_cholesky_ = self.precisions_init
def initialize_params(data, one_hot, cov): """ sklearn's Gaussian Mixture does not allow initialization from class membership but it does allow from initialization of mixture parameters, so here we calculate the mixture parameters according to class membership input: data - nxd numpy array one_hot - nxd numpy array with a single one in each row indicating cluster membership k - number of clusters output: weights - k array of mixing weights means - kxd array of means of mixture components precisions - precision matrices, format depends on the EM clustering option (eg 'full' mode needs a list of matrices, one for each mixture component,but 'tied' mode only needs a single matrix, since all precisions are constrained to be equal) """ n = data.shape[0] weights, means, covariances = _estimate_gaussian_parameters( data, one_hot, 1e-06, cov ) weights /= n precisions_cholesky_ = _compute_precision_cholesky(covariances, cov) if cov == "tied": c = precisions_cholesky_ precisions = np.dot(c, c.T) elif cov == "diag": precisions = precisions_cholesky_ else: precisions = [np.dot(c, c.T) for c in precisions_cholesky_] return weights, means, precisions
def test__estimate_gaussian_parameters_diagonal_no_compression(self): """ Test _estiamte_gaussian_parameters against sklearn's implementation. Diagonal covariances, no compression. """ cov_type = 'diag' reg_covar = 1e-6 gmm = GaussianMixture(n_components=3, num_feat_full=5, num_feat_comp=5, num_feat_shared=5, num_samp=4, transform=None, mask=None, D_indices=None, covariance_type=cov_type, reg_covar=reg_covar) gmm.fit_sparsifier(X=self.td.X) resp = np.random.rand(gmm.num_samp, gmm.n_components) weights_test, means_test, covariances_test = gmm._estimate_gaussian_parameters( resp, cov_type) # skl counts_true, means_true, covariances_true = _estimate_gaussian_parameters( self.td.X, resp, reg_covar, cov_type) # skl returns counts instead of weights. weights_true = counts_true / gmm.num_samp self.assertArrayEqual(weights_test, weights_true) self.assertArrayEqual(means_test, means_true) self.assertArrayEqual(covariances_test, covariances_true)
def _m_step(self, Y, log_resp): """M step. Parameters ---------- Y : array-like, shape (n_samples, n_features) log_resp : array-like, shape (n_samples, n_components) Logarithm of the posterior probabilities (or responsibilities) of the point of each sample in Y. """ Z = self._draw_conditionnal_Z(Y) i = 0 while i < 10 and not self.threshold(Z, Y.shape[1]): # Condition de seuil Z = self._draw_conditionnal_Z(Y) i += 1 print("Ajustement au seuil") n_samples, _ = Y.shape SEMweights_, SEMmeans_, SEMcovariances_ = ( _estimate_gaussian_parameters(Y, Z, self.reg_covar, self.covariance_type)) SEMweights_ /= n_samples EMweights_, EMmeans_, EMcovariances_ = ( _estimate_gaussian_parameters(Y, np.exp(log_resp), self.reg_covar, self.covariance_type)) EMweights_ /= n_samples r = self.current_iter gr = self.gamma(r) self.means_ = (1 - gr) * EMmeans_ + gr * SEMmeans_ self.weights_ = (1 - gr) * EMweights_ + gr * SEMweights_ self.covariances_ = (1 - gr) * EMcovariances_ + gr * SEMcovariances_ self.precisions_cholesky_ = _compute_precision_cholesky( self.covariances_, self.covariance_type) self._m_step_callback(Y)
def _m_step(self, X, log_resp): """M step. Parameters ---------- X : array-like, shape (n_samples, n_features) log_resp : array-like, shape (n_samples, n_components) Logarithm of the posterior probabilities (or responsibilities) of the point of each sample in X. """ n_samples, n_features = X.shape self.weights_, self.mu, self.covariances_ = ( _estimate_gaussian_parameters(X, np.exp(log_resp), self.reg_covar, self.covariance_type)) # update lasso coefficient print "*************updata means by fused lasso now*****************" r_ic = np.exp(log_resp) for i in range(self.n_components): idx = np.where(np.argmax(r_ic,axis=1) == i) print "len(idx):", len(idx[0]) #ensure it can be fitted by fused lasso if len(idx[0])>(n_samples/(2*self.n_components)): print "fused lasso used" data_X_i = r.matrix(X[idx[0]], nrow = len(idx[0]), ncol = n_features) data_Y_i = r.matrix(self.Y[idx[0]],nrow = len(idx[0]), ncol = 1) n = r.nrow(data_X_i) p = r.ncol(data_X_i) print "lasso_n:",n print "lasso_p:",p results = r.fusedlasso1d(y=data_Y_i, X=data_X_i) result = np.array(r.coef(results, np.sqrt(n*np.log(p)))[0])[:,-1] mu_i = np.multiply(result,np.mean(data_X_i,axis=0)) if i == 0: self.means_ = mu_i else: self.means_ = np.vstack((self.means_, mu_i)) else: print "not enough data for fused lasso" if i == 0: self.means_ = self.mu[i] else: self.means_ = np.vstack((self.means_,self.mu[i])) self.weights_ /= n_samples self.precisions_cholesky_ = _compute_precision_cholesky( self.covariances_, self.covariance_type)
def _m_step(self, X, resp): """M step. Parameters ---------- X : array-like, shape (n_samples, n_features) resp : array-like, shape (n_samples, n_components) Logarithm of the posterior probabilities (or responsibilities) of the point of each sample in X. """ n_samples, _ = X.shape self.weights_, self.means_, self.covariances_ = ( _estimate_gaussian_parameters(X, resp, self.reg_covar, self.covariance_type)) self.weights_ /= n_samples self.precisions_cholesky_ = _compute_precision_cholesky( self.covariances_, self.covariance_type)
def _initialize(self, X, resp): """Initialization of the Gaussian mixture parameters. Parameters ---------- X : array-like, shape (n_samples, n_features) resp : array-like, shape (n_samples, n_components) """ n_samples, _ = X.shape weights, means, covariances = _estimate_gaussian_parameters( X, resp, self.reg_covar, self.covariance_type) ### Bound covariance means = np.clip(means, -action_bound, action_bound) covariances = np.clip(covariances, np.exp(-2 * sigma_bound), np.exp(2 * sigma_bound)) ### weights /= n_samples self.weights_ = (weights if self.weights_init is None else self.weights_init) self.means_ = means if self.means_init is None else self.means_init if self.precisions_init is None: self.covariances_ = covariances self.precisions_cholesky_ = _compute_precision_cholesky( covariances, self.covariance_type) elif self.covariance_type == 'full': self.precisions_cholesky_ = np.array([ linalg.cholesky(prec_init, lower=True) for prec_init in self.precisions_init ]) raise ValueError elif self.covariance_type == 'tied': self.precisions_cholesky_ = linalg.cholesky(self.precisions_init, lower=True) raise ValueError else: self.precisions_cholesky_ = self.precisions_init raise ValueError
def _gmm_maximization(self,T,Y,W,resp): N = T.shape[0] H = np.concatenate((T,W,Y), axis = 1) sitype = self.sigma_type == 'iso' and 'spherical' or 'full' card_class, m, V = _estimate_gaussian_parameters(H, resp, self.reg_covar, "full") pi = card_class / N if self.sigma_type == "iso": V = get_full_covariances(V,'spherical',self.K,self.D + self.L) dic = jGLLiM.GMM_to_GLLiM(pi, m, V, self.L) pi, c, Gamma, A, b, Sigma = dic["pi"], dic["c"], dic["Gamma"], dic["A"], dic["b"], dic["Sigma"] if self.sigma_type == 'iso': Sigma = np.array([ s[0,0] for s in Sigma]) ckList_T = c[:,:self.Lt] GammakList_T = Gamma[:, :self.Lt, :self.Lt] return pi, ckList_T, GammakList_T, A, b, Sigma