def gmm_em(dataList, nmix, final_niter, ds_factor): dataList = load_data(dataList) nfiles = len(dataList) gm, gv = comp_gm_gv(dataList) #niter = [1,2,4,4,4,4,6,6,10,10,15] #niter[int(np.log2(nmix))] = final_niter niter = np.ones(10, dtype=np.int32) model = GaussianMixture(1, 'diag', verbose=0, max_iter=100) data = np.concatenate(dataList, axis=1).T mix = 1 while mix <= nmix: if mix >= nmix // 2: ds_factor = 1 print('\nRe-estimating the GMM hyperparameters for %d components ...' % mix) for i in range(niter[int(np.log2(mix))]): print('EM iter#: %d \t' % i, end='') model.fit(data) w = model.weights_ sigma = model.covariances_ sigma = apply_var_floors(w, sigma, 1) model.precisions_ = 1 / sigma model.covariances = sigma llk, _ = model._estimate_log_prob_resp(data) print('[llk = %.2f]' % np.mean(llk)) if mix < nmix: model = mixup(model) mix *= 2 pass
def test_log_responsibilities(self): """ Test the log responsibilities with the help of Sklearn. """ N = 16384 S = 2048 D = 128 means = torch.randn(S, D) covs = torch.rand(S) x = torch.randn(N, D) prior = torch.rand(S) prior /= prior.sum() mixture = GaussianMixture(S, covariance_type='spherical') mixture.means_ = means.numpy() mixture.precisions_cholesky_ = np.sqrt(1 / covs.numpy()) mixture.weights_ = prior.numpy() # pylint: disable=protected-access _, expected = mixture._estimate_log_prob_resp(x.numpy()) expected = torch.from_numpy(expected) probs = log_normal(x, means, covs, 'spherical') predicted = log_responsibilities(probs, prior) self.assertTrue( torch.allclose(expected, predicted, atol=1e-03, rtol=1e-05))
def test__estimate_log_prob_resp_spherical_shared_compression(self): rs = np.random.RandomState(11) cov_type = 'spherical' gmm = GaussianMixture(n_components=3, num_feat_full=5, num_feat_comp=3, num_feat_shared=3, num_samp=4, transform=None, mask=None, D_indices=None, covariance_type=cov_type, random_state=rs) gmm.fit_sparsifier(X=self.td.X) means = rs.rand(gmm.n_components, gmm.num_feat_full) covariances = rs.rand(gmm.n_components) weights = rs.rand(gmm.n_components) weights /= weights.sum() log_prob_test, log_resp_test, log_prob_norm_test = gmm._estimate_log_prob_resp( weights, means, covariances, cov_type) # find skl's values, pretty ugly to do. precisions = _compute_precision_cholesky(covariances, cov_type) gmm_skl = GMSKL(n_components=3, covariance_type=cov_type) # we need the mask to be shared so that we can use mask[0] on all means gmm_skl.means_ = means[:, gmm.mask[0]] gmm_skl.precisions_cholesky_ = precisions gmm_skl.weights_ = weights gmm_skl.covariance_type_ = cov_type log_prob_norm_true, log_resp_true = gmm_skl._estimate_log_prob_resp( gmm.RHDX) # if anything is bad later this overwrite with mean seems suspect: log_prob_norm_true = log_prob_norm_true.mean() # now get the log_prob from another function log_prob_true = _estimate_log_gaussian_prob(gmm.RHDX, gmm_skl.means_, precisions, cov_type) # run the tests self.assertArrayEqual(log_prob_test, log_prob_true) self.assertArrayEqual(log_prob_norm_true, log_prob_norm_test) self.assertArrayEqual(log_resp_true, log_resp_test)
def test__estimate_log_prob_resp_diagonal_no_compression(self): cov_type = 'diag' gmm = GaussianMixture(n_components=3, num_feat_full=5, num_feat_comp=5, num_feat_shared=5, num_samp=4, transform=None, mask=None, D_indices=None, covariance_type=cov_type) gmm.fit_sparsifier(X=self.td.X) means = np.random.rand(gmm.n_components, gmm.num_feat_comp) covariances = np.random.rand(gmm.n_components, gmm.num_feat_comp) weights = np.random.rand(gmm.n_components) weights /= weights.sum() log_prob_test, log_resp_test, log_prob_norm_test = gmm._estimate_log_prob_resp( weights, means, covariances, cov_type) # find skl's values, pretty ugly to do. precisions = _compute_precision_cholesky(covariances, cov_type) gmm_skl = GMSKL(n_components=3, covariance_type=cov_type) gmm_skl.means_ = means gmm_skl.precisions_cholesky_ = precisions gmm_skl.weights_ = weights gmm_skl.covariance_type_ = cov_type log_prob_norm_true, log_resp_true = gmm_skl._estimate_log_prob_resp( self.td.X) # if anything is bad later this overwrite with mean seems suspect: log_prob_norm_true = log_prob_norm_true.mean() # now get the log_prob from another function log_prob_true = _estimate_log_gaussian_prob(self.td.X, means, precisions, cov_type) # run the tests self.assertArrayEqual(log_prob_test, log_prob_true) self.assertArrayEqual(log_prob_norm_true, log_prob_norm_test) self.assertArrayEqual(log_resp_true, log_resp_test)