Exemplo n.º 1
0
    def _onehot_to_initialparams(self, X, onehot, cov_type):
        """
        Computes cluster weights, cluster means and cluster precisions from
        a given clustering.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            List of n_features-dimensional data points. Each row
            corresponds to a single data point.
        onehot : ndarray, shape (n_samples, n_clusters)
            Each row has a 1 indicating cluster membership, other entries are 0.
        cov_type : {'full', 'tied', 'diag', 'spherical'}
            Covariance type for Gaussian mixture model
        """
        n = X.shape[0]
        weights, means, covariances = _estimate_gaussian_parameters(
            X, onehot, 1e-06, cov_type)
        weights /= n

        precisions_cholesky_ = _compute_precision_cholesky(
            covariances, cov_type)

        if cov_type == "tied":
            c = precisions_cholesky_
            precisions = np.dot(c, c.T)
        elif cov_type == "diag":
            precisions = precisions_cholesky_
        else:
            precisions = [np.dot(c, c.T) for c in precisions_cholesky_]

        return weights, means, precisions
Exemplo n.º 2
0
 def __post_init__(self):
     D = self.mean.shape[-1]
     c = np.reshape(self.covariance, (-1, ))
     pc = _compute_precision_cholesky(c, 'diag')
     self.precision_cholesky = np.reshape(pc, self.covariance.shape)
     self.log_det_precision_cholesky = _compute_log_det_cholesky(
         pc, 'spherical', D)
def test_suffstat_sk_tied():
    # use equation Nk * Sk / N = S_tied
    rng = np.random.RandomState(0)
    n_samples, n_features, n_components = 500, 2, 2

    resp = rng.rand(n_samples, n_components)
    resp = resp / resp.sum(axis=1)[:, np.newaxis]
    X = rng.rand(n_samples, n_features)
    nk = resp.sum(axis=0)
    xk = np.dot(resp.T, X) / nk[:, np.newaxis]

    covars_pred_full = _estimate_gaussian_covariances_full(resp, X, nk, xk, 0)
    covars_pred_full = np.sum(nk[:, np.newaxis, np.newaxis] * covars_pred_full,
                              0) / n_samples

    covars_pred_tied = _estimate_gaussian_covariances_tied(resp, X, nk, xk, 0)

    ecov = EmpiricalCovariance()
    ecov.covariance_ = covars_pred_full
    assert_almost_equal(ecov.error_norm(covars_pred_tied, norm='frobenius'), 0)
    assert_almost_equal(ecov.error_norm(covars_pred_tied, norm='spectral'), 0)

    # check the precision computation
    precs_chol_pred = _compute_precision_cholesky(covars_pred_tied, 'tied')
    precs_pred = np.dot(precs_chol_pred, precs_chol_pred.T)
    precs_est = linalg.inv(covars_pred_tied)
    assert_array_almost_equal(precs_est, precs_pred)
Exemplo n.º 4
0
def test_suffstat_sk_tied():
    # use equation Nk * Sk / N = S_tied
    rng = np.random.RandomState(0)
    n_samples, n_features, n_components = 500, 2, 2

    resp = rng.rand(n_samples, n_components)
    resp = resp / resp.sum(axis=1)[:, np.newaxis]
    X = rng.rand(n_samples, n_features)
    nk = resp.sum(axis=0)
    xk = np.dot(resp.T, X) / nk[:, np.newaxis]

    covars_pred_full = _estimate_gaussian_covariances_full(resp, X, nk, xk, 0)
    covars_pred_full = np.sum(nk[:, np.newaxis, np.newaxis] * covars_pred_full,
                              0) / n_samples

    covars_pred_tied = _estimate_gaussian_covariances_tied(resp, X, nk, xk, 0)

    ecov = EmpiricalCovariance()
    ecov.covariance_ = covars_pred_full
    assert_almost_equal(ecov.error_norm(covars_pred_tied, norm='frobenius'), 0)
    assert_almost_equal(ecov.error_norm(covars_pred_tied, norm='spectral'), 0)

    # check the precision computation
    precs_chol_pred = _compute_precision_cholesky(covars_pred_tied, 'tied')
    precs_pred = np.dot(precs_chol_pred, precs_chol_pred.T)
    precs_est = linalg.inv(covars_pred_tied)
    assert_array_almost_equal(precs_est, precs_pred)
Exemplo n.º 5
0
    def _m_step(self, X, log_resp):
        """M step.
        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
        log_resp : array-like, shape (n_samples, n_components)
            Logarithm of the posterior probabilities (or responsibilities) of
            the point of each sample in X.
        """
        n_samples, _ = X.shape
        self.weights_, self.means_, self.covariances_ = (
            _estimate_gaussian_parameters(X, np.exp(log_resp), self.reg_covar,
                                          self.covariance_type))

        ### Bound covariance

        self.means_ = np.clip(self.means_, -action_bound, action_bound)
        self.covariances_ = np.clip(self.covariances_,
                                    np.exp(-2 * sigma_bound),
                                    np.exp(2 * sigma_bound))
        ###

        self.weights_ /= n_samples
        self.precisions_cholesky_ = _compute_precision_cholesky(
            self.covariances_, self.covariance_type)
Exemplo n.º 6
0
    def _initialize(self, X, resp):
        # TODO: Initialize A * A.T
        """Initialization of the Gaussian mixture parameters.
        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
        resp : array-like, shape (n_samples, n_components)
        """
        n_samples, _ = X.shape
        self.y_sub = self._estimate_subspace_repr(X)

        weights, means, covariances = _estimate_gaussian_parameters(
            self.y_sub, resp, self.reg_covar, self.covariance_type)
        weights /= n_samples

        self.weights_ = (weights
                         if self.weights_init is None else self.weights_init)
        self.means_ = means if self.means_init is None else self.means_init

        if self.precisions_init is None:
            self.covariances_ = covariances
            self.precisions_cholesky_ = _compute_precision_cholesky(
                covariances, self.covariance_type)
        elif self.covariance_type == 'full':
            self.precisions_cholesky_ = np.array([
                linalg.cholesky(prec_init, lower=True)
                for prec_init in self.precisions_init
            ])
        elif self.covariance_type == 'tied':
            self.precisions_cholesky_ = linalg.cholesky(self.precisions_init,
                                                        lower=True)
        else:
            self.precisions_cholesky_ = self.precisions_init
    def _m_step(self, Y, log_resp):
        """M step.

        Parameters
        ----------
        Y : array-like, shape (n_samples, n_features)

        log_resp : array-like, shape (n_samples, n_components)
            Logarithm of the posterior probabilities (or responsibilities) of
            the point of each sample in Y.
        """
        Z = self._draw_conditionnal_Z(Y)
        while not self.threshold(Z,Y.shape[1]): #Condition de seuil
            Z = self._draw_conditionnal_Z(Y)
            print("Ajustement au seuil")

        n_samples, _ = Y.shape
        self.weights_, self.means_, self.covariances_ = (
            _estimate_gaussian_parameters(Y, Z, self.reg_covar,
                                          self.covariance_type))
        self.weights_ /= n_samples
        self.precisions_cholesky_ = _compute_precision_cholesky(
            self.covariances_, self.covariance_type)

        self._m_step_callback(Y)
Exemplo n.º 8
0
    def train(self, data):
        if self.initialized:

            ff_tmp = self.params['forgetting_factor'].get_value()

            self.short_term_model = IGMM(self.params['init_components'])
            self.short_term_model.get_best_gmm(
                data, lims=[1, self.params['max_step_components']])
            # lims=[self.params['init_components'], self.params['max_step_components']])
            weights_st = self.short_term_model.weights_
            weights_st = ff_tmp * weights_st
            self.short_term_model.weights_ = weights_st

            #print(ff_tmp)
            weights_lt = self.weights_
            weights_lt = (self.weights_.sum() - ff_tmp
                          ) * weights_lt  # Regularization to keep sum(w)=1.0

            self.weights_ = weights_lt

            gmm_new = copy.deepcopy(self.short_term_model)

            gmm_new = self.merge_similar_gaussians_in_gmm_minim(gmm_new)
            self.mergeGMM(gmm_new)

            self.weights_ = self.weights_ / sum(self.weights_)  #Regularization

        else:
            #self.get_best_gmm(data, lims=[self.params['init_components'], self.params['max_step_components']])
            self.get_best_gmm(data,
                              lims=[
                                  self.params['init_components'],
                                  self.params['init_components']
                              ])
            self.short_term_model = GMM(self.n_components)
            self.initialized = True

        self.precisions_cholesky_ = _compute_precision_cholesky(
            self.covariances_, "full")
        if self.params['infer_fixed']:
            y_dims = self.params['y_dims']
            x_dims = self.params['x_dims']
            SIGMA_YY_inv = np.zeros(
                (self.n_components, len(y_dims), len(y_dims)))
            SIGMA_XY = np.zeros((self.n_components, len(x_dims), len(y_dims)))

            for k, (Mu, Sigma) in enumerate(zip(self.means_,
                                                self.covariances_)):
                Sigma_yy = Sigma[:, y_dims]
                Sigma_yy = Sigma_yy[y_dims, :]

                Sigma_xy = Sigma[x_dims, :]
                Sigma_xy = Sigma_xy[:, y_dims]
                Sigma_yy_inv = linalg.inv(Sigma_yy)

                SIGMA_YY_inv[k, :, :] = Sigma_yy_inv
                SIGMA_XY[k, :, :] = Sigma_xy

            self.SIGMA_YY_inv = SIGMA_YY_inv
            self.SIGMA_XY = SIGMA_XY
Exemplo n.º 9
0
def get_3d_grid_gmm(subdivisions=[5, 5, 5], variance=0.04):
    """
    Compute the weight, mean and covariance of a gmm placed on a 3D grid
    :param subdivisions: 2 element list of number of subdivisions of the 3D space in each axes to form the grid
    :param variance: scalar for spherical gmm.p
    :return gmm: gmm: instance of sklearn GaussianMixture (GMM) object Gauassian mixture model
    """
    # n_gaussians = reduce(lambda x, y: x*y,subdivisions)
    n_gaussians = np.prod(np.array(subdivisions))
    step = [
        1.0 / (subdivisions[0]), 1.0 / (subdivisions[1]),
        1.0 / (subdivisions[2])
    ]

    means = np.mgrid[step[0] - 1:1.0 - step[0]:complex(0, subdivisions[0]),
                     step[1] - 1:1.0 - step[1]:complex(0, subdivisions[1]),
                     step[2] - 1:1.0 - step[2]:complex(0, subdivisions[2])]
    means = np.reshape(means, [3, -1]).T
    covariances = variance * np.ones_like(means)
    weights = (1.0 / n_gaussians) * np.ones(n_gaussians)
    gmm = GaussianMixture(n_components=n_gaussians, covariance_type='diag')
    gmm.weights_ = weights
    gmm.covariances_ = covariances
    gmm.means_ = means
    from sklearn.mixture.gaussian_mixture import _compute_precision_cholesky
    gmm.precisions_cholesky_ = _compute_precision_cholesky(covariances, 'diag')
    return gmm
Exemplo n.º 10
0
def initialize_params(data, one_hot, cov):
    """
    sklearn's Gaussian Mixture does not allow initialization from class membership
    but it does allow from initialization of mixture parameters, so here we calculate
    the mixture parameters according to class membership
    input:
        data - nxd numpy array 
        one_hot - nxd numpy array with a single one in each row indicating cluster
            membership
        k - number of clusters
    output:
        weights - k array of mixing weights
        means - kxd array of means of mixture components
        precisions - precision matrices, format depends on the EM clustering option
            (eg 'full' mode needs a list of matrices, one for each mixture
            component,but 'tied' mode only needs a single matrix, since all
            precisions are constrained to be equal)
    """

    n = data.shape[0]
    weights, means, covariances = _estimate_gaussian_parameters(
        data, one_hot, 1e-06, cov
    )
    weights /= n

    precisions_cholesky_ = _compute_precision_cholesky(covariances, cov)
    if cov == "tied":
        c = precisions_cholesky_
        precisions = np.dot(c, c.T)
    elif cov == "diag":
        precisions = precisions_cholesky_
    else:
        precisions = [np.dot(c, c.T) for c in precisions_cholesky_]

    return weights, means, precisions
Exemplo n.º 11
0
    def loglikelihood(self, means, diagCovs, weights):
        self.evaluator.weights_ = weights
        self.evaluator.covariances_ = diagCovs
        self.evaluator.means_ = means
        self.evaluator.precisions_cholesky_ = _compute_precision_cholesky(
            diagCovs, "diag")

        return self.numPoints * np.sum(self.evaluator.score(self.Xpoints))
Exemplo n.º 12
0
 def __post_init__(self):
     D = self.mean.shape[-1]
     c = np.reshape(self.covariance, (-1, D, D))
     pc = _compute_precision_cholesky(c, 'full')
     self.precision_cholesky = np.reshape(pc, self.covariance.shape)
     self.log_det_precision_cholesky = np.reshape(
         _compute_log_det_cholesky(pc, 'full', D),
         self.covariance.shape[:-2])
Exemplo n.º 13
0
def computeProb(mfcc):
    # sil
    sil_mean = np.loadtxt('sil_mean.txt')
    sil_variance = np.loadtxt('sil_variance.txt')
    sil_weight = np.loadtxt('sil_weight.txt')
    sil_gmm = GaussianMixture(128, covariance_type="diag")
    sil_precisions_cholesky_ = gaussian_mixture._compute_precision_cholesky(
        sil_variance, "diag")

    sil_gmm.means_ = sil_mean
    sil_gmm.weights_ = sil_weight
    sil_gmm.precisions_cholesky_ = sil_precisions_cholesky_
    sil_result = np.dot(sil_gmm.predict_proba(mfcc), sil_weight.reshape(-1, 1))

    # speech
    speech_mean = np.loadtxt('speech_mean.txt')
    speech_variance = np.loadtxt('speech_variance.txt')
    speech_weight = np.loadtxt('speech_weight.txt')
    speech_gmm = GaussianMixture(128, covariance_type="diag")
    speech_precisions_cholesky_ = gaussian_mixture._compute_precision_cholesky(
        speech_variance, "diag")

    speech_gmm.means_ = speech_mean
    speech_gmm.weights_ = speech_weight
    speech_gmm.precisions_cholesky_ = speech_precisions_cholesky_
    speech_result = np.dot(speech_gmm.predict_proba(mfcc),
                           speech_weight.reshape(-1, 1))

    # noise
    noise_mean = np.loadtxt('noise_mean.txt')
    noise_variance = np.loadtxt('noise_variance.txt')
    noise_weight = np.loadtxt('noise_weight.txt')
    noise_gmm = GaussianMixture(128, covariance_type="diag")
    noise_precisions_cholesky_ = gaussian_mixture._compute_precision_cholesky(
        noise_variance, "diag")

    noise_gmm.means_ = noise_mean
    noise_gmm.weights_ = noise_weight
    noise_gmm.precisions_cholesky_ = noise_precisions_cholesky_
    noise_result = np.dot(noise_gmm.predict_proba(mfcc),
                          noise_weight.reshape(-1, 1))

    return sil_result, speech_result, noise_result
Exemplo n.º 14
0
    def __init__(self, gmm, swap=False, diff=False):

        # D: static + delta dim
        D = gmm.means_.shape[1] // 2

        self.num_mixtures = gmm.means_.shape[0]

        self.weights = gmm.weights_

        # Split source and target parameters from joint GMM
        self.src_means = gmm.means_[:, :D]

        self.tgt_means = gmm.means_[:, D:]

        self.covarXX = gmm.covariances_[:, :D, :D]

        self.covarXY = gmm.covariances_[:, :D, D:]

        self.covarYX = gmm.covariances_[:, D:, :D]

        self.covarYY = gmm.covariances_[:, D:, D:]

        if diff:

            self.tgt_means = self.tgt_means - self.src_means

            self.covarYY = self.covarXX + self.covarYY - self.covarXY - self.covarYX

            self.covarXY = self.covarXY - self.covarXX

            self.covarYX = self.covarXY.transpose(0, 2, 1)

        # swap src and target parameters
        if swap:

            self.tgt_means, self.src_means = self.src_means, self.tgt_means

            self.covarYY, self.covarXX = self.covarXX, self.covarYY

            self.covarYX, self.covarXY = self.covarXY, self.covarYX

        # p(x), which is used to compute posterior prob. for a given source
        # spectral feature in mapping stage.
        self.px = sklearn.mixture.GaussianMixture(
            n_components=self.num_mixtures, covariance_type="full")

        self.px.means_ = self.src_means

        self.px.covariances_ = self.covarXX

        self.px.weights_ = self.weights

        self.px.precisions_cholesky_ = _compute_precision_cholesky(
            self.px.covariances_, "full")
Exemplo n.º 15
0
    def _set_pX(self):
        # probability density function of X
        self.pX = sklearn.mixture.GaussianMixture(n_components=self.n_mix,
                                                  covariance_type='full')
        self.pX.weights_ = self.w
        self.pX.means_ = self.meanX
        self.pX.covariances_ = self.covXX

        # following function is required to estimate porsterior
        self.pX.precisions_cholesky_ = _compute_precision_cholesky(
            self.covXX, 'full')
        return
def test_suffstat_sk_full():
    # compare the precision matrix compute from the
    # EmpiricalCovariance.covariance fitted on X*sqrt(resp)
    # with _sufficient_sk_full, n_components=1
    rng = np.random.RandomState(0)
    n_samples, n_features = 500, 2

    # special case 1, assuming data is "centered"
    X = rng.rand(n_samples, n_features)
    resp = rng.rand(n_samples, 1)
    X_resp = np.sqrt(resp) * X
    nk = np.array([n_samples])
    xk = np.zeros((1, n_features))
    covars_pred = _estimate_gaussian_covariances_full(resp, X, nk, xk, 0)
    ecov = EmpiricalCovariance(assume_centered=True)
    ecov.fit(X_resp)
    assert_almost_equal(ecov.error_norm(covars_pred[0], norm='frobenius'), 0)
    assert_almost_equal(ecov.error_norm(covars_pred[0], norm='spectral'), 0)

    # check the precision computation
    precs_chol_pred = _compute_precision_cholesky(covars_pred, 'full')
    precs_pred = np.array([np.dot(prec, prec.T) for prec in precs_chol_pred])
    precs_est = np.array([linalg.inv(cov) for cov in covars_pred])
    assert_array_almost_equal(precs_est, precs_pred)

    # special case 2, assuming resp are all ones
    resp = np.ones((n_samples, 1))
    nk = np.array([n_samples])
    xk = X.mean(axis=0).reshape((1, -1))
    covars_pred = _estimate_gaussian_covariances_full(resp, X, nk, xk, 0)
    ecov = EmpiricalCovariance(assume_centered=False)
    ecov.fit(X)
    assert_almost_equal(ecov.error_norm(covars_pred[0], norm='frobenius'), 0)
    assert_almost_equal(ecov.error_norm(covars_pred[0], norm='spectral'), 0)

    # check the precision computation
    precs_chol_pred = _compute_precision_cholesky(covars_pred, 'full')
    precs_pred = np.array([np.dot(prec, prec.T) for prec in precs_chol_pred])
    precs_est = np.array([linalg.inv(cov) for cov in covars_pred])
    assert_array_almost_equal(precs_est, precs_pred)
Exemplo n.º 17
0
def test_suffstat_sk_full():
    # compare the precision matrix compute from the
    # EmpiricalCovariance.covariance fitted on X*sqrt(resp)
    # with _sufficient_sk_full, n_components=1
    rng = np.random.RandomState(0)
    n_samples, n_features = 500, 2

    # special case 1, assuming data is "centered"
    X = rng.rand(n_samples, n_features)
    resp = rng.rand(n_samples, 1)
    X_resp = np.sqrt(resp) * X
    nk = np.array([n_samples])
    xk = np.zeros((1, n_features))
    covars_pred = _estimate_gaussian_covariances_full(resp, X, nk, xk, 0)
    ecov = EmpiricalCovariance(assume_centered=True)
    ecov.fit(X_resp)
    assert_almost_equal(ecov.error_norm(covars_pred[0], norm='frobenius'), 0)
    assert_almost_equal(ecov.error_norm(covars_pred[0], norm='spectral'), 0)

    # check the precision computation
    precs_chol_pred = _compute_precision_cholesky(covars_pred, 'full')
    precs_pred = np.array([np.dot(prec, prec.T) for prec in precs_chol_pred])
    precs_est = np.array([linalg.inv(cov) for cov in covars_pred])
    assert_array_almost_equal(precs_est, precs_pred)

    # special case 2, assuming resp are all ones
    resp = np.ones((n_samples, 1))
    nk = np.array([n_samples])
    xk = X.mean(axis=0).reshape((1, -1))
    covars_pred = _estimate_gaussian_covariances_full(resp, X, nk, xk, 0)
    ecov = EmpiricalCovariance(assume_centered=False)
    ecov.fit(X)
    assert_almost_equal(ecov.error_norm(covars_pred[0], norm='frobenius'), 0)
    assert_almost_equal(ecov.error_norm(covars_pred[0], norm='spectral'), 0)

    # check the precision computation
    precs_chol_pred = _compute_precision_cholesky(covars_pred, 'full')
    precs_pred = np.array([np.dot(prec, prec.T) for prec in precs_chol_pred])
    precs_est = np.array([linalg.inv(cov) for cov in covars_pred])
    assert_array_almost_equal(precs_est, precs_pred)
Exemplo n.º 18
0
def get_gmm(weights, means, covariances):
    gmm = mixture.GaussianMixture(weights.size)

    # set parameters
    gmm.weights_ = weights
    gmm.means_ = means
    gmm.covariances_ = covariances
    gmm.covariance_type = 'full'

    # compute cholesky precisions
    covariance_data = (gmm.covariances_, gmm.covariance_type)
    gmm.precisions_cholesky_ = _compute_precision_cholesky(*covariance_data)
    return gmm
def multimod_emd_from_gmm(means, sigmas, weights):
    means_stacked = np.concatenate(means, axis=0)[:, :, 0, 0]
    sigmas_stacked = np.concatenate(sigmas, axis=0)[:, :, 0, 0]
    weights_stacked = np.concatenate(weights, axis=0)[:, 0, 0, 0]
    gmm = GaussianMixture(n_components=4, covariance_type='diag')
    gmm_vars = 2 * sigmas_stacked * sigmas_stacked
    precisions_cholesky = _compute_precision_cholesky(gmm_vars, 'diag')
    gmm.weights_ = weights_stacked
    gmm.means_ = means_stacked
    gmm.precisions_cholesky_ = precisions_cholesky
    gmm.covariances_ = gmm_vars
    y_sampled, _ = gmm.sample(1000)
    return wemd_from_pred_samples(y_sampled)
Exemplo n.º 20
0
    def jsd_diss(self, w1, mu1, cov1, w2, mu2, cov2):
        """
		Calculates Jensen-Shannon divergence of two gmm's
		:param gmm_p: mixture.GaussianMixture
		:param gmm_q: mixture.GaussianMixture
		:param sample_count: number of monte carlo samples to use
		:return: Jensen-Shannon divergence
		"""
        gmm_p = GaussianMixture(n_components=n_components,
                                covariance_type="full")
        gmm_p.weights_ = w1
        gmm_p.covariances_ = cov1
        gmm_p.means_ = mu1
        gmm_p.n_components = 1
        gmm_p.precisions_cholesky_ = _compute_precision_cholesky(cov1, "full")

        gmm_q = GaussianMixture(n_components=n_components,
                                covariance_type="full")
        gmm_q.weights_ = w2
        gmm_q.covariances_ = cov2
        gmm_q.means_ = mu2
        gmm_q.n_components = 1
        gmm_q.precisions_cholesky_ = _compute_precision_cholesky(cov2, "full")

        X = gmm_p.sample(sample_count)[0]
        log_p_X = gmm_p.score_samples(X)
        log_q_X = gmm_q.score_samples(X)
        log_mix_X = np.logaddexp(log_p_X, log_q_X)

        Y = gmm_q.sample(sample_count)[0]
        log_p_Y = gmm_p.score_samples(Y)
        log_q_Y = gmm_q.score_samples(Y)
        log_mix_Y = np.logaddexp(log_p_Y, log_q_Y)

        # black magic?
        return (log_p_X.mean() -
                (log_mix_X.mean() - np.log(2)) + log_q_Y.mean() -
                (log_mix_Y.mean() - np.log(2))) / 2
Exemplo n.º 21
0
def gmm_loglik(y,pi,mu,sigma,K):
    model = GaussianMixture(K, covariance_type = 'diag')
    model.fit(y)
    N = np.shape(mu)[0]
    N_test = np.shape(y)[0]
    ll_test = np.zeros(N)
    for i in (range(N)):
        model.means_ = mu[i,:]
        model.covariances_ = sigma[i,:]**2
        model.precisions_ = 1/(sigma[i,:]**2)
        model.weights_ = pi[i,:]
        model.precisions_cholesky_ = _compute_precision_cholesky(model.covariances_, model.covariance_type)
        ll_test[i] = model.score(y) 
    return ll_test*N_test
Exemplo n.º 22
0
    def _set_pX(self):
        # probability density function of X
        self.pX = sklearn.mixture.GaussianMixture(
            n_components=self.n_mix, covariance_type=self.covtype)
        self.pX.weights_ = self.w
        self.pX.means_ = self.meanX
        self.pX.covariances_ = self.covXX

        # following function is required to estimate porsterior
        # P(X | \lambda^(X)))
        self.pX.precisions_cholesky_ = _compute_precision_cholesky(
            self.covXX, self.covtype)

        return
Exemplo n.º 23
0
def generate_equal_weight_GMM(H_mu, H_var, covariance_type='diag'):
    n_components = len(H_mu)
    weights_init = len(H_mu) * [1. / len(H_mu)]
    GMM = GaussianMixture(n_components=n_components,
                          covariance_type=covariance_type, n_init=0, weights_init=None, means_init=None,
                          precisions_init=None, random_state=None, warm_start=True, verbose=0,
                          verbose_interval=10)
    GMM.weights_ = weights_init
    GMM.means_ = H_mu
    GMM.covariances_ = H_var
    GMM.precisions_cholesky_ = _compute_precision_cholesky(
        H_var, covariance_type)

    return GMM
Exemplo n.º 24
0
 def _m_step(self, X, log_resp):   
     """M step.
     Parameters
     ----------
     X : array-like, shape (n_samples, n_features)
     log_resp : array-like, shape (n_samples, n_components)
         Logarithm of the posterior probabilities (or responsibilities) of
         the point of each sample in X.
     """
     n_samples, n_features = X.shape
     self.weights_, self.mu, self.covariances_ = (
         _estimate_gaussian_parameters(X, np.exp(log_resp), self.reg_covar,
                                       self.covariance_type))
     
     # update lasso coefficient
     print "*************updata means by fused lasso now*****************"
     r_ic = np.exp(log_resp)
     
     for i in range(self.n_components):
         idx = np.where(np.argmax(r_ic,axis=1) == i)
         
         print "len(idx):", len(idx[0])
         #ensure it can be fitted by fused lasso
         if len(idx[0])>(n_samples/(2*self.n_components)):
             print "fused lasso used"
             data_X_i = r.matrix(X[idx[0]], nrow = len(idx[0]), ncol = n_features)
             data_Y_i = r.matrix(self.Y[idx[0]],nrow = len(idx[0]), ncol = 1)
             n = r.nrow(data_X_i)
             p = r.ncol(data_X_i)
             print "lasso_n:",n
             print "lasso_p:",p
             results = r.fusedlasso1d(y=data_Y_i, X=data_X_i)
             result = np.array(r.coef(results, np.sqrt(n*np.log(p)))[0])[:,-1]
             mu_i = np.multiply(result,np.mean(data_X_i,axis=0))
             if i == 0:
                 self.means_ = mu_i
             else:
                 self.means_ = np.vstack((self.means_, mu_i))
             
         else:
             print "not enough data for fused lasso"
             if i == 0:
                 self.means_ = self.mu[i]
             else:
                 self.means_ = np.vstack((self.means_,self.mu[i]))
             
     self.weights_ /= n_samples
     self.precisions_cholesky_ = _compute_precision_cholesky(
         self.covariances_, self.covariance_type) 
Exemplo n.º 25
0
def _create_gmm(k, means, weights, precisions=None, covariances=None):
    if covariances is None:
        precisions = np.array(precisions)
        covariances = np.linalg.pinv(precisions)
    elif precisions is None:
        covariances = np.array(covariances)
        precisions = np.linalg.pinv(covariances)

    gmm = GaussianMixture(n_components=k,
                          weights_init=weights,
                          means_init=means,
                          reg_covar=1e-2,
                          precisions_init=precisions,
                          max_iter=1,
                          warm_start=True)

    try:
        gmm.precisions_cholesky_ = _compute_precision_cholesky(covariances,
                                                               'full')
    except Exception:
        c2 = covariances.copy()
        covariances = _singular_prevent_multiple(covariances)
        precisions = np.linalg.pinv(covariances)
        try:
            gmm.precisions_cholesky_ = _compute_precision_cholesky(covariances,
                                                                   'full')
        except Exception:
            c2.dump('cov.npy')
            raise Exception('Problema na matriz! Dump no arquivo cov.npy')

    gmm.weights_ = weights
    gmm.means_ = means
    gmm.covariances_ = covariances
    gmm.precisions_ = precisions

    return gmm
Exemplo n.º 26
0
def lppd(y, pi, mu, sigma, K):  #calculate posterior predictive of test
    model = skgm.GaussianMixture(K, covariance_type='diag')
    B = np.shape(mu)[0]
    N_test = np.shape(y)[0]
    ll_test = np.zeros((B, N_test))
    model.fit(y, np.ones(N_test))
    for i in range(B):
        model.means_ = mu[i, :]
        model.covariances_ = sigma[i, :]**2
        model.precisions_ = 1 / (sigma[i, :]**2)
        model.weights_ = pi[i, :]
        model.precisions_cholesky_ = _compute_precision_cholesky(
            model.covariances_, model.covariance_type)
        ll_test[i] = model.score_lppd(y)
    lppd_test = np.sum(sp.special.logsumexp(ll_test, axis=0) - np.log(B))
    return lppd_test
Exemplo n.º 27
0
 def _m_step(self, X, resp):
     """M step.
 
           Parameters
           ----------
           X : array-like, shape (n_samples, n_features)
 
           resp : array-like, shape (n_samples, n_components)
               Posterior probabilities (or responsibilities) of
               the point of each sample in X.
           """
     n_samples, _ = X.shape
     self.weights_, self.means_, self.covariances_ = (
         _estimate_ard_parameters(X, self.weights_, self.reg_weights_, resp,
                                  self.reg_covar, self.covariance_type))
     self.weights_ /= n_samples
     self.precisions_cholesky_ = _compute_precision_cholesky(
         self.covariances_, self.covariance_type)
Exemplo n.º 28
0
def read_pred():
    means = readFloat('%s-mixture_distribution_means.float3' %
                      predition_path)  # shape (4, 2)
    sigmas = readFloat('%s-mixture_distribution_sigmas.float3' %
                       predition_path)  # shape (4, 2)
    weights = readFloat('%s-mixture_distribution_weights.float3' %
                        predition_path)  # shape (4)

    sigmas = 2 * sigmas * sigmas

    gmm = GaussianMixture(n_components=4, covariance_type='diag')

    precisions_cholesky = _compute_precision_cholesky(sigmas, 'diag')
    gmm.weights_ = weights
    gmm.means_ = means
    gmm.precisions_cholesky_ = precisions_cholesky
    gmm.covariances_ = sigmas

    return gmm
Exemplo n.º 29
0
    def test_hand_computation_of_log_prob_vs_sklearn(self):
        """ Something seems wrong with my mahadist computation. Before digging
        further into the C library to find the error, I want to make sure that
        the results I think it should give are right. One way to gather
        evidence in favor of this conclusion is to use the result in the
        computation of the log probability (this is what led me here in the
        first place). This test does so, and consequently doesn't actually
        test any of the code in gmm.py. For this to work the mask must be
        entirely shared. """
        cov_type = 'spherical'
        rs = np.random.RandomState(10)
        gmm = GaussianMixture(n_components=3,
                              num_feat_full=5,
                              num_feat_comp=3,
                              num_feat_shared=3,
                              num_samp=4,
                              transform=None,
                              mask=None,
                              D_indices=None,
                              covariance_type=cov_type,
                              random_state=rs)
        gmm.fit_sparsifier(X=self.td.X)
        means = rs.rand(gmm.n_components, gmm.num_feat_full)
        covariances = rs.rand(gmm.n_components)
        precisions = _compute_precision_cholesky(covariances, cov_type)

        # this is where we need the mask to be shared, so that all mask rows
        # equal mask[0]
        masked_means = means[:, gmm.mask[0]]
        log_prob_true = _estimate_log_gaussian_prob(gmm.RHDX, masked_means,
                                                    precisions, cov_type)

        log_prob_test = np.zeros((gmm.num_samp, gmm.n_components))
        for data_ind in range(gmm.num_samp):
            for comp_ind in range(gmm.n_components):
                test_const = gmm.num_feat_comp * np.log(2 * np.pi)
                test_logdet = gmm.num_feat_comp * np.log(covariances[comp_ind])
                test_mahadist = 1/covariances[comp_ind] * \
                    np.linalg.norm(gmm.RHDX[data_ind] -
                        means[comp_ind][gmm.mask[data_ind]])**2
                log_prob_test[data_ind, comp_ind] = -.5*(test_const + \
                    test_logdet + test_mahadist)
        self.assertArrayEqual(log_prob_test, log_prob_true)
Exemplo n.º 30
0
def test_compute_log_det_cholesky():
    n_features = 2
    rand_data = RandomData(np.random.RandomState(0))

    for covar_type in COVARIANCE_TYPE:
        covariance = rand_data.covariances[covar_type]

        if covar_type == 'full':
            predected_det = np.array([linalg.det(cov) for cov in covariance])
        elif covar_type == 'tied':
            predected_det = linalg.det(covariance)
        elif covar_type == 'diag':
            predected_det = np.array([np.prod(cov) for cov in covariance])
        elif covar_type == 'spherical':
            predected_det = covariance ** n_features

        # We compute the cholesky decomposition of the covariance matrix
        expected_det = _compute_log_det_cholesky(_compute_precision_cholesky(
            covariance, covar_type), covar_type, n_features=n_features)
        assert_array_almost_equal(expected_det, - .5 * np.log(predected_det))
Exemplo n.º 31
0
    def _m_step(self, X, log_resp):
        """M step.
        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
        log_resp : array-like, shape (n_samples, n_components)
            Logarithm of the posterior probabilities (or responsibilities) of
            the point of each sample in X.
        """
        n_samples, _ = X.shape

        self.y_sub = self._estimate_subspace_repr(X, lr=log_resp)
        self.weights_, self.means_, self.covariances_ = (
            _estimate_gaussian_parameters(self.y_sub, np.exp(log_resp),
                                          self.reg_covar,
                                          self.covariance_type))
        self.weights_ /= n_samples
        self.precisions_cholesky_ = _compute_precision_cholesky(
            self.covariances_, self.covariance_type)
        self.noise = self._estimate_noise(X)
Exemplo n.º 32
0
    def _Gmm_setup(self, T, Y, maxIter):
        first_theta = self.compute_next_theta(T, Y)  # theta from rnk
        jGMM_params = self.GLLiM_to_GGM(*first_theta)
        precisions_chol = _compute_precision_cholesky(jGMM_params["V"], "full")
        precisions = np.matmul(precisions_chol,
                               precisions_chol.transpose((0, 2, 1)))
        TY = np.concatenate((T, Y), axis=1)

        verbose = {None: -1, False: 0, True: 1}[self.verbose]
        self.Gmm = MyGMM(n_components=self.K,
                         n_init=1,
                         max_iter=maxIter,
                         reg_covar=self.reg_covar,
                         tol=self.stopping_ratio,
                         weights_init=jGMM_params["rho"],
                         means_init=jGMM_params["m"],
                         precisions_init=precisions,
                         verbose=verbose,
                         track=self.track_theta)
        return TY, self.Gmm
Exemplo n.º 33
0
    def load_mixture_model(data, use_mgrd=True):
         if use_mgrd:
             mm = ExtendedMGRDMixtureModel.load_from_json({'covars': data['gmm_covars'],
                                                           'means': data['gmm_means'],
                                                           'weights': data['gmm_weights']})
         else:
             n_components =len(np.array(data['gmm_weights']))
             mm = GaussianMixture(n_components=n_components, covariance_type='full')#weights_init=np.array(data['gmm_weights']),
            #reg_covar=np.array(data['gmm_covars']),
            #means_init=np.array(data['gmm_means']), covariance_type='full')
             mm.weights_ = np.array(data['gmm_weights'])
             mm.means_ = np.array(data['gmm_means'])
             mm.converged_ = True
             mm.covariances_ = np.array(data['gmm_covars'])
             mm.precisions_cholesky_ = _compute_precision_cholesky(mm.covariances_, covariance_type='full')
             mm.n_dims =len(mm.means_[0])
             # if 'gmm_precisions_cholesky' not in data.keys():

             write_message_to_log("Initialize scipy GMM", LOG_MODE_DEBUG)
         return mm
def test_compute_log_det_cholesky():
    n_features = 2
    rand_data = RandomData(np.random.RandomState(0))

    for covar_type in COVARIANCE_TYPE:
        covariance = rand_data.covariances[covar_type]

        if covar_type == 'full':
            predected_det = np.array([linalg.det(cov) for cov in covariance])
        elif covar_type == 'tied':
            predected_det = linalg.det(covariance)
        elif covar_type == 'diag':
            predected_det = np.array([np.prod(cov) for cov in covariance])
        elif covar_type == 'spherical':
            predected_det = covariance ** n_features

        # We compute the cholesky decomposition of the covariance matrix
        expected_det = _compute_log_det_cholesky(_compute_precision_cholesky(
            covariance, covar_type), covar_type, n_features=n_features)
        assert_array_almost_equal(expected_det, - .5 * np.log(predected_det))
def test_gaussian_suffstat_sk_spherical():
    # computing spherical covariance equals to the variance of one-dimension
    # data after flattening, n_components=1
    rng = np.random.RandomState(0)
    n_samples, n_features = 500, 2

    X = rng.rand(n_samples, n_features)
    X = X - X.mean()
    resp = np.ones((n_samples, 1))
    nk = np.array([n_samples])
    xk = X.mean()
    covars_pred_spherical = _estimate_gaussian_covariances_spherical(resp, X,
                                                                     nk, xk, 0)
    covars_pred_spherical2 = (np.dot(X.flatten().T, X.flatten()) /
                              (n_features * n_samples))
    assert_almost_equal(covars_pred_spherical, covars_pred_spherical2)

    # check the precision computation
    precs_chol_pred = _compute_precision_cholesky(covars_pred_spherical,
                                                  'spherical')
    assert_almost_equal(covars_pred_spherical, 1. / precs_chol_pred ** 2)
def test_suffstat_sk_diag():
    # test against 'full' case
    rng = np.random.RandomState(0)
    n_samples, n_features, n_components = 500, 2, 2

    resp = rng.rand(n_samples, n_components)
    resp = resp / resp.sum(axis=1)[:, np.newaxis]
    X = rng.rand(n_samples, n_features)
    nk = resp.sum(axis=0)
    xk = np.dot(resp.T, X) / nk[:, np.newaxis]
    covars_pred_full = _estimate_gaussian_covariances_full(resp, X, nk, xk, 0)
    covars_pred_diag = _estimate_gaussian_covariances_diag(resp, X, nk, xk, 0)

    ecov = EmpiricalCovariance()
    for (cov_full, cov_diag) in zip(covars_pred_full, covars_pred_diag):
        ecov.covariance_ = np.diag(np.diag(cov_full))
        cov_diag = np.diag(cov_diag)
        assert_almost_equal(ecov.error_norm(cov_diag, norm='frobenius'), 0)
        assert_almost_equal(ecov.error_norm(cov_diag, norm='spectral'), 0)

    # check the precision computation
    precs_chol_pred = _compute_precision_cholesky(covars_pred_diag, 'diag')
    assert_almost_equal(covars_pred_diag, 1. / precs_chol_pred ** 2)