def test_log_responsibilities(self): """ Test the log responsibilities with the help of Sklearn. """ N = 16384 S = 2048 D = 128 means = torch.randn(S, D) covs = torch.rand(S) x = torch.randn(N, D) prior = torch.rand(S) prior /= prior.sum() mixture = GaussianMixture(S, covariance_type='spherical') mixture.means_ = means.numpy() mixture.precisions_cholesky_ = np.sqrt(1 / covs.numpy()) mixture.weights_ = prior.numpy() # pylint: disable=protected-access _, expected = mixture._estimate_log_prob_resp(x.numpy()) expected = torch.from_numpy(expected) probs = log_normal(x, means, covs, 'spherical') predicted = log_responsibilities(probs, prior) self.assertTrue( torch.allclose(expected, predicted, atol=1e-03, rtol=1e-05))
def log_prob(self, x, t, feature, samples): observations = x[:, :t] p_s_past = {} #p(s_t-1|X_{0:t-1}) for st in self.states: p_s_past[st], _, _ = fwd_bkw(observations, self.states, self.start_probability, self.transition_probability, self.emission_probability, st) p_currstate_past = {} for s in self.states: p_currstate_past[s] = 0. for curr_state in self.states: for st in self.states: p_currstate_past[curr_state] += self.transition_probability[ curr_state][st] * p_s_past[st] gmm = GaussianMixture(n_components=len(self.states), covariance_type='full') gmm.fit(np.random.randn(10, observations.shape[0])) gmm.weights_ = list(p_currstate_past.values()) gmm.means_ = np.array(self.mean) gmm.covariances_ = np.array(self.cov) for i in range(2): gmm.precisions_[i] = np.linalg.inv(gmm.covariances_[i]) gmm.precisions_cholesky_[i] = np.linalg.cholesky( gmm.covariances_[i]) return gmm.score_samples(samples)
def sample_gaussian_mixture(pis, sigmas, mus, num_samples, n_features): """ return: array of size (batch_size,num_samples*n_features) containing samples taken from the gaussian mixture parameratized by pis, sigmas, mus e.g input 1 [[ s1_f1,s1_f2,s1_f3 | s2_f1, s2_f2, s2_f3 |.... ], input 2 [ s1_f1,s1_f2,s1_f3 | s2_f1, s2_f2, s2_f3 |.... ], input 3 [ s1_f1,s1_f2,s1_f3 | s2_f1, s2_f2, s2_f3 |.... ], . [...............................................], input n [ s1_f1,s1_f2,s1_f3 | s2_f1, s2_f2, s2_f3 |.... ]] """ # Gaussian PDF parameters batch_size = pis.shape[0] num_mixtures = pis.shape[1] samples = np.zeros((batch_size, num_samples * n_features)) gmm = GaussianMixture(n_components=num_mixtures, covariance_type='spherical') gmm.fit(np.random.rand(10, 1)) # Now it thinks it is trained for i in range(batch_size): gmm.weights_ = pis[i] gmm.means_ = mus[i].reshape(num_mixtures, n_features) gmm.covariances_ = np.expand_dims(sigmas[i], axis=1)**2 sample = gmm.sample(num_samples) samples[i] = np.ravel(sample[0]) return Variable(torch.from_numpy(samples))
def get_P_of_Data_Given_Param(means, covs, weights, X, method='scipy'): # P(Data | Param) samples = zip(range(0, len(X)), X) p = {} if method == 'scipy': g = [ multivariate_normal(mean=means[k], cov=covs[k], allow_singular=False) for k in range(0, len(weights)) ] gaussians = {} for index, x in samples: gaussians[index] = np.array([g_k.pdf(x) for g_k in g]) for index, x in samples: probabilities = np.multiply(gaussians[index], weights) probabilities = probabilities / np.sum(probabilities) p[index] = probabilities else: gmm = GaussianMixture(n_components=len(weights), covariance_type='diag').fit(X) gmm.precisions_cholesky_ = 1 # crude way to make GMM think that model is fit gmm.means_ = means gmm.covariances_ = covs gmm.weights_ = weights for index, x in samples: x = x.reshape(1, -1) likelihood_ratio = gmm.predict_proba(x.reshape(1, -1)) # likelihood_ratio = likelihood_ratio / np.sum(likelihood_ratio) p[index] = likelihood_ratio return p
def toGaussianMixture(self): g = GaussianMixture(self.n) g.fit(np.random.rand(2 * self.n).reshape((-1, 1))) g.weights_ = np.array(self.weights) g.means_ = np.array(self.means)[:, np.newaxis] g.covariances_ = np.array(self.covariances)[:, np.newaxis, np.newaxis] return g
def cause(n, k, p1, p2): g = GaussianMixture(k) g.means_ = p1 * np.random.randn(k, 1) g.covars_ = np.power(abs(p2 * np.random.randn(k, 1) + 1), 2) g.weights_ = abs(np.random.rand(k, 1)) g.weights_ = g.weights_ / sum(g.weights_) return scale(g.sample(n))
def get_3d_grid_gmm(subdivisions=[5, 5, 5], variance=0.04): """ Compute the weight, mean and covariance of a gmm placed on a 3D grid :param subdivisions: 2 element list of number of subdivisions of the 3D space in each axes to form the grid :param variance: scalar for spherical gmm.p :return gmm: gmm: instance of sklearn GaussianMixture (GMM) object Gauassian mixture model """ # n_gaussians = reduce(lambda x, y: x*y,subdivisions) n_gaussians = np.prod(np.array(subdivisions)) step = [ 1.0 / (subdivisions[0]), 1.0 / (subdivisions[1]), 1.0 / (subdivisions[2]) ] means = np.mgrid[step[0] - 1:1.0 - step[0]:complex(0, subdivisions[0]), step[1] - 1:1.0 - step[1]:complex(0, subdivisions[1]), step[2] - 1:1.0 - step[2]:complex(0, subdivisions[2])] means = np.reshape(means, [3, -1]).T covariances = variance * np.ones_like(means) weights = (1.0 / n_gaussians) * np.ones(n_gaussians) gmm = GaussianMixture(n_components=n_gaussians, covariance_type='diag') gmm.weights_ = weights gmm.covariances_ = covariances gmm.means_ = means from sklearn.mixture.gaussian_mixture import _compute_precision_cholesky gmm.precisions_cholesky_ = _compute_precision_cholesky(covariances, 'diag') return gmm
def test_fit(self): expected_means = np.array([-55., 0., 7.]) expected_stds = np.array([3., .5, 1.]) from sklearn.mixture import GaussianMixture gmm = GaussianMixture(n_components=3, means_init=expected_means) gmm.means_ = expected_means[..., None] gmm.covariances_ = np.array(expected_stds[..., None, None]) gmm.weights_ = np.array([1 / 3, 1 / 3, 1 / 3]) obs = gmm.sample(100000 + np.random.randint(-3, 3))[0].squeeze() init = deeptime.markov.hmm.init.gaussian.from_data(obs, n_hidden_states=3, reversible=True) hmm_est = deeptime.markov.hmm.MaximumLikelihoodHMM(init) hmm = hmm_est.fit(obs).fetch_model() np.testing.assert_array_almost_equal( hmm.transition_model.transition_matrix, np.eye(3), decimal=3) m = hmm.output_model for mean, sigma in zip(m.means, m.sigmas): # find the mean closest to this one (order might have changed) mean_ix = np.argmin(np.abs(expected_means - mean)) np.testing.assert_almost_equal(mean, expected_means[mean_ix], decimal=1) np.testing.assert_almost_equal(sigma * sigma, expected_stds[mean_ix], decimal=1)
def cluster(data, num_of_clusters): x = data[[0, 1]] y = data['label'] means = [] for clus in range(num_of_clusters): clus_x = data[(data['label'] == clus)] #print 'clus: %d' % clus #print clus_x means.append([clus_x[0].mean(), clus_x[1].mean()]) # print 'x' # print x # print 'y' # print y # clusterer = KMeans(n_clusters=num_of_clusters) # clusterer = GaussianMixture(n_components=num_of_clusters, means_init=means) clusterer = GaussianMixture(n_components=num_of_clusters) clusterer.fit(x, y) print 'clusterer means: ' print clusterer.means_ for clus in range(num_of_clusters): xx, yy = clusterer.means_[clus] plt.scatter([xx], [yy], c=colors[clus], marker="s", s=200, alpha=0.5) means = np.array(means) print 'computer means:' print means #print pd.DataFrame(means) clusterer.means_ = means print 'clusterer new means: ' print clusterer.means_ for clus in range(num_of_clusters): xx, yy = clusterer.means_[clus] plt.scatter([xx], [yy], c=colors[clus], marker="d", s=200, alpha=0.5) preds = clusterer.predict(x) data['pred'] = preds draw_features_cluster(data, num_of_clusters)
def create_random_gmm(n_mix, n_features, covariance_type, prng=0): prng = check_random_state(prng) g = GaussianMixture(n_mix, covariance_type=covariance_type) g.means_ = prng.randint(-20, 20, (n_mix, n_features)) g.covars_ = make_covar_matrix(covariance_type, n_mix, n_features) g.weights_ = normalized(prng.rand(n_mix)) return g
def gmm_scale(gmm, shift=None, scale=None, reverse=False, params=None): """ Apply scaling factors to GMM instances. Parameters ---------- gmm : GaussianMixture GMM instance to be scaled. shift : int, float, optional Shift for the entire model. Default is 0 (no shift). scale : int, float, optional Scale for all components. Default is 1 (no scale). reverse : bool, optional Whether the GMM should be reversed. params GaussianMixture params for initialization of new instance. Returns ------- GaussianMixture Modified GMM instance. """ # Fetch parameters if not supplied if params is None: # noinspection PyUnresolvedReferences params = gmm.get_params() # Instantiate new GMM gmm_new = GaussianMixture(**params) # Create scaled fitted GMM model gmm_new.weights_ = gmm.weights_ # Apply shift if set gmm_new.means_ = gmm.means_ + shift if shift is not None else gmm.means_ # Apply scale if scale is not None: gmm_new.means_ /= scale gmm_new.covariances_ = gmm.covariances_ / scale ** 2 if scale is not None else gmm.covariances_ gmm_new.precisions_ = np.linalg.inv(gmm_new.covariances_) if scale is not None else gmm.precisions_ gmm_new.precisions_cholesky_ = np.linalg.cholesky(gmm_new.precisions_) if scale is not None \ else gmm.precisions_cholesky_ # Reverse if set if reverse: gmm_new.means_ *= -1 # Add converged attribute if available if gmm.converged_: gmm_new.converged_ = gmm.converged_ # Return scaled GMM return gmm_new
def computeProb(mfcc): # sil sil_mean = np.loadtxt('sil_mean.txt') sil_variance = np.loadtxt('sil_variance.txt') sil_weight = np.loadtxt('sil_weight.txt') sil_gmm = GaussianMixture(128, covariance_type="diag") sil_precisions_cholesky_ = gaussian_mixture._compute_precision_cholesky( sil_variance, "diag") sil_gmm.means_ = sil_mean sil_gmm.weights_ = sil_weight sil_gmm.precisions_cholesky_ = sil_precisions_cholesky_ sil_result = np.dot(sil_gmm.predict_proba(mfcc), sil_weight.reshape(-1, 1)) # speech speech_mean = np.loadtxt('speech_mean.txt') speech_variance = np.loadtxt('speech_variance.txt') speech_weight = np.loadtxt('speech_weight.txt') speech_gmm = GaussianMixture(128, covariance_type="diag") speech_precisions_cholesky_ = gaussian_mixture._compute_precision_cholesky( speech_variance, "diag") speech_gmm.means_ = speech_mean speech_gmm.weights_ = speech_weight speech_gmm.precisions_cholesky_ = speech_precisions_cholesky_ speech_result = np.dot(speech_gmm.predict_proba(mfcc), speech_weight.reshape(-1, 1)) # noise noise_mean = np.loadtxt('noise_mean.txt') noise_variance = np.loadtxt('noise_variance.txt') noise_weight = np.loadtxt('noise_weight.txt') noise_gmm = GaussianMixture(128, covariance_type="diag") noise_precisions_cholesky_ = gaussian_mixture._compute_precision_cholesky( noise_variance, "diag") noise_gmm.means_ = noise_mean noise_gmm.weights_ = noise_weight noise_gmm.precisions_cholesky_ = noise_precisions_cholesky_ noise_result = np.dot(noise_gmm.predict_proba(mfcc), noise_weight.reshape(-1, 1)) return sil_result, speech_result, noise_result
def gmm_cause(points, k=2, p1=3, p2=4): """Init a root cause with a Gaussian Mixture Model w/ a spherical covariance type.""" g = GMM(k, covariance_type="spherical") g.fit(np.random.randn(300, 1)) g.means_ = p1 * np.random.randn(k, 1) g.covars_ = np.power(abs(p2 * np.random.randn(k, 1) + 1), 2) g.weights_ = abs(np.random.rand(k)) g.weights_ = g.weights_ / sum(g.weights_) return g.sample(points)[0].reshape(-1)
def return_copy(self): '''If any trouble be sure that assignation of means and weights is done copying through assignation ''' copy_tmp = GMM(n_components=self.n_components) copy_tmp.covariances_ = self.covariances_ #_get_covars() copy_tmp.means_ = self.means_ copy_tmp.weights_ = self.weights_ return copy_tmp
def multimod_emd_from_gmm(means, sigmas, weights): means_stacked = np.concatenate(means, axis=0)[:, :, 0, 0] sigmas_stacked = np.concatenate(sigmas, axis=0)[:, :, 0, 0] weights_stacked = np.concatenate(weights, axis=0)[:, 0, 0, 0] gmm = GaussianMixture(n_components=4, covariance_type='diag') gmm_vars = 2 * sigmas_stacked * sigmas_stacked precisions_cholesky = _compute_precision_cholesky(gmm_vars, 'diag') gmm.weights_ = weights_stacked gmm.means_ = means_stacked gmm.precisions_cholesky_ = precisions_cholesky gmm.covariances_ = gmm_vars y_sampled, _ = gmm.sample(1000) return wemd_from_pred_samples(y_sampled)
def get_multimodality_score(means, sigmas, weights): means_stacked = np.concatenate(means, axis=0)[:, :, 0, 0] sigmas_stacked = np.concatenate(sigmas, axis=0)[:, :, 0, 0] weights_stacked = np.concatenate(weights, axis=0)[:, 0, 0, 0] gmm = GaussianMixture(n_components=4, covariance_type='diag') vars = 2 * sigmas_stacked * sigmas_stacked precisions_cholesky = _compute_precision_cholesky(vars, 'diag') gmm.weights_ = weights_stacked gmm.means_ = means_stacked gmm.precisions_cholesky_ = precisions_cholesky gmm.covariances_ = vars gmm_uni = GaussianMixture(n_components=1, covariance_type='diag') argmax = np.argmax(gmm.weights_) gmm_uni.means_ = gmm.means_[argmax, :].reshape([1, 2]) gmm_uni.covariances_ = gmm.covariances_[argmax, :].reshape([1, 2]) gmm_uni.precisions_cholesky_ = gmm.precisions_cholesky_[argmax, :].reshape( [1, 2]) gmm_uni.weights_ = np.array([1]).reshape([1]) Z_uni = compute_histogram_gmm(gmm_uni) Z = compute_histogram_gmm(gmm) ratio = computeWEMD(Z, Z_uni) return ratio
def create_sklearn_gmm(weights, mean_tensor, cov_tensor, random_state=0): n_components = len(weights) gmm = GaussianMixture(n_components=n_components, covariance_type='full', random_state=random_state) gmm.weights_ = weights.numpy() gmm.means_ = mean_tensor.numpy() gmm.covariances_ = cov_tensor.numpy() gmm.precisions_ = np.array( [np.linalg.inv(cov) for cov in gmm.covariances_]) gmm.precisions_cholesky_ = np.array( [np.linalg.cholesky(prec) for prec in gmm.precisions_]) return gmm
def jsd_diss(self, w1, mu1, cov1, w2, mu2, cov2): """ Calculates Jensen-Shannon divergence of two gmm's :param gmm_p: mixture.GaussianMixture :param gmm_q: mixture.GaussianMixture :param sample_count: number of monte carlo samples to use :return: Jensen-Shannon divergence """ gmm_p = GaussianMixture(n_components=n_components, covariance_type="full") gmm_p.weights_ = w1 gmm_p.covariances_ = cov1 gmm_p.means_ = mu1 gmm_p.n_components = 1 gmm_p.precisions_cholesky_ = _compute_precision_cholesky(cov1, "full") gmm_q = GaussianMixture(n_components=n_components, covariance_type="full") gmm_q.weights_ = w2 gmm_q.covariances_ = cov2 gmm_q.means_ = mu2 gmm_q.n_components = 1 gmm_q.precisions_cholesky_ = _compute_precision_cholesky(cov2, "full") X = gmm_p.sample(sample_count)[0] log_p_X = gmm_p.score_samples(X) log_q_X = gmm_q.score_samples(X) log_mix_X = np.logaddexp(log_p_X, log_q_X) Y = gmm_q.sample(sample_count)[0] log_p_Y = gmm_p.score_samples(Y) log_q_Y = gmm_q.score_samples(Y) log_mix_Y = np.logaddexp(log_p_Y, log_q_Y) # black magic? return (log_p_X.mean() - (log_mix_X.mean() - np.log(2)) + log_q_Y.mean() - (log_mix_Y.mean() - np.log(2))) / 2
def generate_equal_weight_GMM(H_mu, H_var, covariance_type='diag'): n_components = len(H_mu) weights_init = len(H_mu) * [1. / len(H_mu)] GMM = GaussianMixture(n_components=n_components, covariance_type=covariance_type, n_init=0, weights_init=None, means_init=None, precisions_init=None, random_state=None, warm_start=True, verbose=0, verbose_interval=10) GMM.weights_ = weights_init GMM.means_ = H_mu GMM.covariances_ = H_var GMM.precisions_cholesky_ = _compute_precision_cholesky( H_var, covariance_type) return GMM
def gmm_loglik(y,pi,mu,sigma,K): model = GaussianMixture(K, covariance_type = 'diag') model.fit(y) N = np.shape(mu)[0] N_test = np.shape(y)[0] ll_test = np.zeros(N) for i in (range(N)): model.means_ = mu[i,:] model.covariances_ = sigma[i,:]**2 model.precisions_ = 1/(sigma[i,:]**2) model.weights_ = pi[i,:] model.precisions_cholesky_ = _compute_precision_cholesky(model.covariances_, model.covariance_type) ll_test[i] = model.score(y) return ll_test*N_test
def generate_params_gmm(weight, mean, cov, use_cdf=False): gmm = GaussianMixture(n_components=weight.size) gmm.weights_ = weight gmm.means_ = mean gmm.covariances_ = cov # Pass the fit check gmm.precisions_cholesky_ = None params = gmm.sample()[0][0] if use_cdf: params = ndtr(params) else: params = np.clip(params, 0, 1) params = params.tolist() return params
def visualize_latent_space(file_name, z_mean, z_std, x_label='$\\mathbf{z}$', y_label='pdf', show=False): """Visualizes approximation ability. Args: file_name: File name without extension. z_mean: ndarray (N, dZ) with latent states. z_std: ndarray (N, dZ, dZ) with std of latent states. x_label: ALbel of the x-axis. y_label: ALbel of the y-axis. show: Display generated plot. This is a blocking operation. """ fig = plt.figure() ax1 = fig.add_subplot(111) ax1.set_xlabel(x_label) ax1.set_ylabel(y_label) ax1.grid(linestyle=':') N, dZ = z_mean.shape xs = np.linspace(-3, 3, 1000).reshape(1000, 1) plt.plot(xs, sp.stats.norm.pdf(xs), color="black", linestyle=":", label='$\\mathcal{N}(0,1)$') for dim in range(dZ): # Fit GMM by hand gmm = GaussianMixture(N) gmm.means_ = z_mean[:, dim].reshape(N, 1) gmm.precisions_cholesky_ = (1 / z_std[:, dim]).reshape(N, 1, 1) gmm.weights_ = np.ones(N) / N ax1.plot(xs, np.exp(gmm.score_samples(xs)), linewidth=1, label='$\\mathbf{z}[%d]$' % dim) ax1.legend() if file_name is not None: fig.savefig(file_name + ".pdf", bbox_inches='tight', pad_inches=0) if show: plt.show() plt.close(fig)
def _sample_rows_same(self, X): """ uses efficient sklearn implementation to sample from gaussian mixture -> only works if all rows of X are the same""" weights, locs, scales = self._get_mixture_components(np.expand_dims(X[0], axis=0)) # make sure that sum of weights < 1 weights = weights.astype(np.float64) weights = weights / np.sum(weights) gmm = GaussianMixture(n_components=self.n_centers, covariance_type='diag', max_iter=5, tol=1e-1) gmm.fit(np.random.normal(size=(100,self.ndim_y))) # just pretending a fit # overriding the GMM parameters with own params gmm.converged_ = True gmm.weights_ = weights[0] gmm.means_ = locs[0] gmm.covariances_ = scales[0] y_sample, _ = gmm.sample(X.shape[0]) assert y_sample.shape == (X.shape[0], self.ndim_y) return X, y_sample
def fit(self, df): if len(df.columns) > 2: return print('error: data should have up to 2-dimension') self.data = df if self.use_kmeans_init: gm = GaussianMixture(n_components=self.no_clusters, random_state=0) gm.means_ = init_k_means(df, no_clusters=self.no_clusters) print(f'k-means clustering initialize: {gm.means_}') gm.fit(df) else: gm = GaussianMixture(n_components=self.no_clusters, random_state=0).fit(df) print(f'clustering by no k-means initializing') self.means = gm.means_ self.variance = gm.covariances_ self.proportions = gm.weights_ return self.means, self.variance, self.proportions
def read_pred(): means = readFloat('%s-mixture_distribution_means.float3' % predition_path) # shape (4, 2) sigmas = readFloat('%s-mixture_distribution_sigmas.float3' % predition_path) # shape (4, 2) weights = readFloat('%s-mixture_distribution_weights.float3' % predition_path) # shape (4) sigmas = 2 * sigmas * sigmas gmm = GaussianMixture(n_components=4, covariance_type='diag') precisions_cholesky = _compute_precision_cholesky(sigmas, 'diag') gmm.weights_ = weights gmm.means_ = means gmm.precisions_cholesky_ = precisions_cholesky gmm.covariances_ = sigmas return gmm
def EM_Process(data, n, covt): ''' data: array shape data. n: the number of components covt: covariance_type {‘full’, ‘tied’, ‘diag’, ‘spherical’} chose one of them. ''' GM = GaussianMixture(n_components=n, covariance_type=covt, max_iter=600, random_state=3) GM.means_ = np.zeros(3) GM.covariances_ = np.identity(3) GM.fit(data) clusters = GM.predict(data) return clusters
def train(self, train_data): # 1. Create a GMM object and specify the number of components (classes) in the object # 2. Fit the model to our training data. NOTE: You may need to reshape with np.reshape(-1,1) # 3. Return None data = np.array(train_data).reshape(-1, 1) gmm = GaussianMixture(n_components=2) fit = gmm.fit(data) sort_indices = gmm.means_.argsort(axis=0) order = sort_indices[:, 0] gmm.means_ = gmm.means_[order, :] gmm.covariances_ = gmm.covariances_[order, :] w = np.split(gmm.weights_, 2) w = np.asarray(w) w = np.ravel(w[order, :]) gmm.weights_ = w self.__model = gmm return
def test_once_by_random_features(): Xtrain = numpy.random.random_sample((5000)).reshape(-1, 10) Xtest = numpy.random.random_sample((500)).reshape(-1, 10) gmm_orig = GaussianMixture(n_components=8, random_state=1) gmm_copy = GaussianMixture() gmm_orig.fit(Xtrain) gmm_copy.weights_ = gmm_orig.weights_ gmm_copy.means_ = gmm_orig.means_ gmm_copy.covariances_ = gmm_orig.covariances_ gmm_copy.precisions_ = gmm_orig.precisions_ gmm_copy.precisions_cholesky_ = gmm_orig.precisions_cholesky_ gmm_copy.converged_ = gmm_orig.converged_ gmm_copy.n_iter_ = gmm_orig.n_iter_ gmm_copy.lower_bound_ = gmm_orig.lower_bound_ y_orig = gmm_orig.score_samples(Xtest) y_copy = gmm_copy.score_samples(Xtest) return all(y_orig == y_copy)
def test__estimate_log_prob_resp_spherical_shared_compression(self): rs = np.random.RandomState(11) cov_type = 'spherical' gmm = GaussianMixture(n_components=3, num_feat_full=5, num_feat_comp=3, num_feat_shared=3, num_samp=4, transform=None, mask=None, D_indices=None, covariance_type=cov_type, random_state=rs) gmm.fit_sparsifier(X=self.td.X) means = rs.rand(gmm.n_components, gmm.num_feat_full) covariances = rs.rand(gmm.n_components) weights = rs.rand(gmm.n_components) weights /= weights.sum() log_prob_test, log_resp_test, log_prob_norm_test = gmm._estimate_log_prob_resp( weights, means, covariances, cov_type) # find skl's values, pretty ugly to do. precisions = _compute_precision_cholesky(covariances, cov_type) gmm_skl = GMSKL(n_components=3, covariance_type=cov_type) # we need the mask to be shared so that we can use mask[0] on all means gmm_skl.means_ = means[:, gmm.mask[0]] gmm_skl.precisions_cholesky_ = precisions gmm_skl.weights_ = weights gmm_skl.covariance_type_ = cov_type log_prob_norm_true, log_resp_true = gmm_skl._estimate_log_prob_resp( gmm.RHDX) # if anything is bad later this overwrite with mean seems suspect: log_prob_norm_true = log_prob_norm_true.mean() # now get the log_prob from another function log_prob_true = _estimate_log_gaussian_prob(gmm.RHDX, gmm_skl.means_, precisions, cov_type) # run the tests self.assertArrayEqual(log_prob_test, log_prob_true) self.assertArrayEqual(log_prob_norm_true, log_prob_norm_test) self.assertArrayEqual(log_resp_true, log_resp_test)
def fit_markov_chain(y,plot=False): y_0 = y[:-1] y_1 = y[1:] grad_0 = np.gradient(y_0) grad_1 = np.gradient(y_1) state_1 = grad_1[np.where(grad_0 < 0)] # instances where previous gradient was negative state_2 = grad_1[np.where(grad_0 > 0)] # instances where previous gradient was positive mean_1,std_1 = stats.norm.fit(state_1) mean_2,std_2 = stats.norm.fit(state_2) # Reshaping parameters to be suitable for sklearn.GaussianMixture means = np.array([mean_1,mean_2]) means = means.reshape(2,1) y_GM = np.concatenate((state_2.reshape(-1,1),state_1.reshape(-1,1))) precisions = [1/(std_1**2),1/(std_2**2)] GM = GaussianMixture(n_components=2,covariance_type='spherical') GM.weights_ = [0.5,0.5] GM.means_ = means GM.covariances_ = [std_1,std_2] GM.precisions_ = precisions GM.precisions_cholesky_ = precisions GM.converged_ = True if(plot): samples = GM.sample(5000)[0] fig,ax_list = plt.subplots(3,1) fig.set_size_inches(20,20) ax_list[0].hist(state_1,bins=70) ax_list[1].hist(state_2,bins=70) lnspc_1 = np.linspace(state_1.min(),state_1.max(),y.shape[0]) gauss_1 = stats.norm.pdf(lnspc_1, mean_1, std_1) lnspc_2 = np.linspace(state_2.min(),state_2.max(),y.shape[0]) gauss_2 = stats.norm.pdf(lnspc_2, mean_2, std_2) ax_list[0].plot(lnspc_1,gauss_1) ax_list[1].plot(lnspc_2,gauss_2) ax_list[0].scatter(mean_1,30) ax_list[1].scatter(mean_2,30) ax_list[2].hist(samples,bins=100) plt.show() return GM
def fit_gmm_to_points(points, n_components, mdl, ps=[], num_iter=100, covariance_type='full', min_covar=0.001, init_centers=[], force_radii=-1.0, force_weight=-1.0, mass_multiplier=1.0): """fit a GMM to some points. Will return the score and the Akaike score. Akaike information criterion for the current model fit. It is a measure of the relative quality of the GMM that takes into account the parsimony and the goodness of the fit. if no particles are provided, they will be created points: list of coordinates (python) n_components: number of gaussians to create mdl: IMP Model ps: list of particles to be decorated. if empty, will add num_iter: number of EM iterations covariance_type: covar type for the gaussians. options: 'full', 'diagonal', 'spherical' min_covar: assign a minimum value to covariance term. That is used to have more spherical shaped gaussians init_centers: initial coordinates of the GMM force_radii: fix the radii (spheres only) force_weight: fix the weights mass_multiplier: multiply the weights of all the gaussians by this value dirichlet: use the DGMM fitting (can reduce number of components, takes longer) """ new_sklearn = False try: from sklearn.mixture import GMM except ImportError: from sklearn.mixture import GaussianMixture new_sklearn = True print('creating GMM with n_components',n_components,'n_iter',num_iter,'covar type',covariance_type) if new_sklearn: # aic() calls size() on points, so it needs to a numpy array, not a list points = np.array(points) weights_init = precisions_init = None if force_radii != -1.0: print('warning: radii can no longer be forced, but setting ' 'initial values to ', force_radii) precisions_init = np.array([[1./force_radii]*3 for i in range(n_components)]) if force_weight != -1.0: print('warning: weights can no longer be forced, but setting ' 'initial values to ', force_weight) weights_init = np.array([force_weight]*n_components) gmm = GaussianMixture(n_components=n_components, max_iter=num_iter, covariance_type=covariance_type, weights_init=weights_init, precisions_init=precisions_init, means_init=None if init_centers==[] else init_centers) else: params='m' init_params='m' if force_radii==-1.0: params+='c' init_params+='c' else: covariance_type='spherical' print('forcing spherical with radii',force_radii) if force_weight==-1.0: params+='w' init_params+='w' else: print('forcing weights to be',force_weight) gmm = GMM(n_components=n_components, n_iter=num_iter, covariance_type=covariance_type, min_covar=min_covar, params=params, init_params=init_params) if force_weight!=-1.0: gmm.weights_=np.array([force_weight]*n_components) if force_radii!=-1.0: gmm.covars_=np.array([[force_radii]*3 for i in range(n_components)]) if init_centers!=[]: gmm.means_=init_centers print('fitting') model=gmm.fit(points) score=gmm.score(points) akaikescore=model.aic(points) #print('>>> GMM score',gmm.score(points)) ### convert format to core::Gaussian if new_sklearn: covars = gmm.covariances_ else: covars = gmm.covars_ for ng in range(n_components): covar=covars[ng] if covar.size==3: covar=np.diag(covar).tolist() else: covar=covar.tolist() center=list(gmm.means_[ng]) weight=mass_multiplier*gmm.weights_[ng] if ng>=len(ps): ps.append(IMP.Particle(mdl)) shape=IMP.algebra.get_gaussian_from_covariance(covar,IMP.algebra.Vector3D(center)) g=IMP.core.Gaussian.setup_particle(ps[ng],shape) IMP.atom.Mass.setup_particle(ps[ng],weight) IMP.core.XYZR.setup_particle(ps[ng],sqrt(max(g.get_variances()))) return (score,akaikescore)