Пример #1
0
def test_gaussian_mixture_fit_predict_n_init():
    # Check that fit_predict is equivalent to fit.predict, when n_init > 1
    X = np.random.RandomState(0).randn(1000, 5)
    gm = GaussianMixture(n_components=5, n_init=5, random_state=0)
    y_pred1 = gm.fit_predict(X)
    y_pred2 = gm.predict(X)
    assert_array_equal(y_pred1, y_pred2)
Пример #2
0
def test_regularisation():
    # We train the GaussianMixture on degenerate data by defining two clusters
    # of a 0 covariance.
    rng = np.random.RandomState(0)
    n_samples, n_features = 10, 5

    X = np.vstack((np.ones(
        (n_samples // 2, n_features)), np.zeros((n_samples // 2, n_features))))

    for covar_type in COVARIANCE_TYPE:
        gmm = GaussianMixture(n_components=n_samples,
                              reg_covar=0,
                              covariance_type=covar_type,
                              random_state=rng)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore", RuntimeWarning)
            assert_raise_message(
                ValueError, "Fitting the mixture model failed because "
                "some components have ill-defined empirical "
                "covariance (for instance caused by "
                "singleton or collapsed samples). Try to "
                "decrease the number of components, or "
                "increase reg_covar.", gmm.fit, X)

            gmm.set_params(reg_covar=1e-6).fit(X)
Пример #3
0
def test_gaussian_mixture_estimate_log_prob_resp():
    # test whether responsibilities are normalized
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng, scale=5)
    n_samples = rand_data.n_samples
    n_features = rand_data.n_features
    n_components = rand_data.n_components

    X = rng.rand(n_samples, n_features)
    for covar_type in COVARIANCE_TYPE:
        weights = rand_data.weights
        means = rand_data.means
        precisions = rand_data.precisions[covar_type]
        g = GaussianMixture(n_components=n_components,
                            random_state=rng,
                            weights_init=weights,
                            means_init=means,
                            precisions_init=precisions,
                            covariance_type=covar_type)
        g.fit(X)
        resp = g.predict_proba(X)
        assert_array_almost_equal(resp.sum(axis=1), np.ones(n_samples))
        assert_array_equal(g.weights_init, weights)
        assert_array_equal(g.means_init, means)
        assert_array_equal(g.precisions_init, precisions)
Пример #4
0
def test_score():
    covar_type = 'full'
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng, scale=7)
    n_components = rand_data.n_components
    X = rand_data.X[covar_type]

    # Check the error message if we don't call fit
    gmm1 = GaussianMixture(n_components=n_components,
                           n_init=1,
                           max_iter=1,
                           reg_covar=0,
                           random_state=rng,
                           covariance_type=covar_type)
    assert_raise_message(
        NotFittedError, "This GaussianMixture instance is not fitted "
        "yet. Call 'fit' with appropriate arguments "
        "before using this estimator.", gmm1.score, X)

    # Check score value
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", ConvergenceWarning)
        gmm1.fit(X)
    gmm_score = gmm1.score(X)
    gmm_score_proba = gmm1.score_samples(X).mean()
    assert_almost_equal(gmm_score, gmm_score_proba)

    # Check if the score increase
    gmm2 = GaussianMixture(n_components=n_components,
                           n_init=1,
                           reg_covar=0,
                           random_state=rng,
                           covariance_type=covar_type).fit(X)
    assert gmm2.score(X) > gmm1.score(X)
Пример #5
0
def test_monotonic_likelihood():
    # We check that each step of the EM without regularization improve
    # monotonically the training set likelihood
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng, scale=7)
    n_components = rand_data.n_components

    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]
        gmm = GaussianMixture(n_components=n_components,
                              covariance_type=covar_type,
                              reg_covar=0,
                              warm_start=True,
                              max_iter=1,
                              random_state=rng,
                              tol=1e-7)
        current_log_likelihood = -np.infty
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", ConvergenceWarning)
            # Do one training iteration at a time so we can make sure that the
            # training log likelihood increases after each iteration.
            for _ in range(600):
                prev_log_likelihood = current_log_likelihood
                try:
                    current_log_likelihood = gmm.fit(X).score(X)
                except ConvergenceWarning:
                    pass
                assert (current_log_likelihood >= prev_log_likelihood)

                if gmm.converged_:
                    break

            assert gmm.converged_
Пример #6
0
def test_gaussian_mixture_fit():
    # recover the ground truth
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)
    n_features = rand_data.n_features
    n_components = rand_data.n_components

    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]
        g = GaussianMixture(n_components=n_components,
                            n_init=20,
                            reg_covar=0,
                            random_state=rng,
                            covariance_type=covar_type)
        g.fit(X)

        # needs more data to pass the test with rtol=1e-7
        assert_allclose(np.sort(g.weights_),
                        np.sort(rand_data.weights),
                        rtol=0.1,
                        atol=1e-2)

        arg_idx1 = g.means_[:, 0].argsort()
        arg_idx2 = rand_data.means[:, 0].argsort()
        assert_allclose(g.means_[arg_idx1],
                        rand_data.means[arg_idx2],
                        rtol=0.1,
                        atol=1e-2)

        if covar_type == 'full':
            prec_pred = g.precisions_
            prec_test = rand_data.precisions['full']
        elif covar_type == 'tied':
            prec_pred = np.array([g.precisions_] * n_components)
            prec_test = np.array([rand_data.precisions['tied']] * n_components)
        elif covar_type == 'spherical':
            prec_pred = np.array(
                [np.eye(n_features) * c for c in g.precisions_])
            prec_test = np.array([
                np.eye(n_features) * c
                for c in rand_data.precisions['spherical']
            ])
        elif covar_type == 'diag':
            prec_pred = np.array([np.diag(d) for d in g.precisions_])
            prec_test = np.array(
                [np.diag(d) for d in rand_data.precisions['diag']])

        arg_idx1 = np.trace(prec_pred, axis1=1, axis2=2).argsort()
        arg_idx2 = np.trace(prec_test, axis1=1, axis2=2).argsort()
        for k, h in zip(arg_idx1, arg_idx2):
            ecov = EmpiricalCovariance()
            ecov.covariance_ = prec_test[h]
            # the accuracy depends on the number of data and randomness, rng
            assert_allclose(ecov.error_norm(prec_pred[k]), 0, atol=0.15)
Пример #7
0
def test_gaussian_mixture_n_parameters():
    # Test that the right number of parameters is estimated
    rng = np.random.RandomState(0)
    n_samples, n_features, n_components = 50, 5, 2
    X = rng.randn(n_samples, n_features)
    n_params = {'spherical': 13, 'diag': 21, 'tied': 26, 'full': 41}
    for cv_type in COVARIANCE_TYPE:
        g = GaussianMixture(n_components=n_components,
                            covariance_type=cv_type,
                            random_state=rng).fit(X)
        assert g._n_parameters() == n_params[cv_type]
Пример #8
0
def test_bic_1d_1component():
    # Test all of the covariance_types return the same BIC score for
    # 1-dimensional, 1 component fits.
    rng = np.random.RandomState(0)
    n_samples, n_dim, n_components = 100, 1, 1
    X = rng.randn(n_samples, n_dim)
    bic_full = GaussianMixture(n_components=n_components,
                               covariance_type='full',
                               random_state=rng).fit(X).bic(X)
    for covariance_type in ['tied', 'diag', 'spherical']:
        bic = GaussianMixture(n_components=n_components,
                              covariance_type=covariance_type,
                              random_state=rng).fit(X).bic(X)
        assert_almost_equal(bic_full, bic)
Пример #9
0
def test_multiple_init():
    # Test that multiple inits does not much worse than a single one
    rng = np.random.RandomState(0)
    n_samples, n_features, n_components = 50, 5, 2
    X = rng.randn(n_samples, n_features)
    for cv_type in COVARIANCE_TYPE:
        train1 = GaussianMixture(n_components=n_components,
                                 covariance_type=cv_type,
                                 random_state=0).fit(X).score(X)
        train2 = GaussianMixture(n_components=n_components,
                                 covariance_type=cv_type,
                                 random_state=0,
                                 n_init=5).fit(X).score(X)
        assert train2 >= train1
Пример #10
0
def test_gaussian_mixture_verbose():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)
    n_components = rand_data.n_components
    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]
        g = GaussianMixture(n_components=n_components,
                            n_init=1,
                            reg_covar=0,
                            random_state=rng,
                            covariance_type=covar_type,
                            verbose=1)
        h = GaussianMixture(n_components=n_components,
                            n_init=1,
                            reg_covar=0,
                            random_state=rng,
                            covariance_type=covar_type,
                            verbose=2)
        old_stdout = sys.stdout
        sys.stdout = StringIO()
        try:
            g.fit(X)
            h.fit(X)
        finally:
            sys.stdout = old_stdout
Пример #11
0
def test_convergence_detected_with_warm_start():
    # We check that convergence is detected when warm_start=True
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)
    n_components = rand_data.n_components
    X = rand_data.X['full']

    for max_iter in (1, 2, 50):
        gmm = GaussianMixture(n_components=n_components,
                              warm_start=True,
                              max_iter=max_iter,
                              random_state=rng)
        for _ in range(100):
            gmm.fit(X)
            if gmm.converged_:
                break
        assert gmm.converged_
        assert max_iter >= gmm.n_iter_
Пример #12
0
def test_init():
    # We check that by increasing the n_init number we have a better solution
    for random_state in range(15):
        rand_data = RandomData(np.random.RandomState(random_state),
                               n_samples=50,
                               scale=1)
        n_components = rand_data.n_components
        X = rand_data.X['full']

        gmm1 = GaussianMixture(n_components=n_components,
                               n_init=1,
                               max_iter=1,
                               random_state=random_state).fit(X)
        gmm2 = GaussianMixture(n_components=n_components,
                               n_init=10,
                               max_iter=1,
                               random_state=random_state).fit(X)

        assert gmm2.lower_bound_ >= gmm1.lower_bound_
Пример #13
0
def test_score_samples():
    covar_type = 'full'
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng, scale=7)
    n_components = rand_data.n_components
    X = rand_data.X[covar_type]

    # Check the error message if we don't call fit
    gmm = GaussianMixture(n_components=n_components,
                          n_init=1,
                          reg_covar=0,
                          random_state=rng,
                          covariance_type=covar_type)
    assert_raise_message(
        NotFittedError, "This GaussianMixture instance is not fitted "
        "yet. Call 'fit' with appropriate arguments "
        "before using this estimator.", gmm.score_samples, X)

    gmm_score_samples = gmm.fit(X).score_samples(X)
    assert gmm_score_samples.shape[0] == rand_data.n_samples
Пример #14
0
def test_gaussian_mixture_fit_predict(seed, max_iter, tol):
    rng = np.random.RandomState(seed)
    rand_data = RandomData(rng)
    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]
        Y = rand_data.Y
        g = GaussianMixture(n_components=rand_data.n_components,
                            random_state=rng,
                            weights_init=rand_data.weights,
                            means_init=rand_data.means,
                            precisions_init=rand_data.precisions[covar_type],
                            covariance_type=covar_type,
                            max_iter=max_iter,
                            tol=tol)

        # check if fit_predict(X) is equivalent to fit(X).predict(X)
        f = copy.deepcopy(g)
        Y_pred1 = f.fit(X).predict(X)
        Y_pred2 = g.fit_predict(X)
        assert_array_equal(Y_pred1, Y_pred2)
        assert adjusted_rand_score(Y, Y_pred2) > .95
Пример #15
0
def test_property():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng, scale=7)
    n_components = rand_data.n_components

    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]
        gmm = GaussianMixture(n_components=n_components,
                              covariance_type=covar_type,
                              random_state=rng,
                              n_init=5)
        gmm.fit(X)
        if covar_type == 'full':
            for prec, covar in zip(gmm.precisions_, gmm.covariances_):

                assert_array_almost_equal(linalg.inv(prec), covar)
        elif covar_type == 'tied':
            assert_array_almost_equal(linalg.inv(gmm.precisions_),
                                      gmm.covariances_)
        else:
            assert_array_almost_equal(gmm.precisions_, 1. / gmm.covariances_)
Пример #16
0
def test_check_precisions():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)

    n_components, n_features = rand_data.n_components, rand_data.n_features

    # Define the bad precisions for each covariance_type
    precisions_bad_shape = {
        'full': np.ones((n_components + 1, n_features, n_features)),
        'tied': np.ones((n_features + 1, n_features + 1)),
        'diag': np.ones((n_components + 1, n_features)),
        'spherical': np.ones((n_components + 1))
    }

    # Define not positive-definite precisions
    precisions_not_pos = np.ones((n_components, n_features, n_features))
    precisions_not_pos[0] = np.eye(n_features)
    precisions_not_pos[0, 0, 0] = -1.

    precisions_not_positive = {
        'full': precisions_not_pos,
        'tied': precisions_not_pos[0],
        'diag': np.full((n_components, n_features), -1.),
        'spherical': np.full(n_components, -1.)
    }

    not_positive_errors = {
        'full': 'symmetric, positive-definite',
        'tied': 'symmetric, positive-definite',
        'diag': 'positive',
        'spherical': 'positive'
    }

    for covar_type in COVARIANCE_TYPE:
        X = RandomData(rng).X[covar_type]
        g = GaussianMixture(n_components=n_components,
                            covariance_type=covar_type,
                            random_state=rng)

        # Check precisions with bad shapes
        g.precisions_init = precisions_bad_shape[covar_type]
        assert_raise_message(
            ValueError, "The parameter '%s precision' should have "
            "the shape of" % covar_type, g.fit, X)

        # Check not positive precisions
        g.precisions_init = precisions_not_positive[covar_type]
        assert_raise_message(
            ValueError, "'%s precision' should be %s" %
            (covar_type, not_positive_errors[covar_type]), g.fit, X)

        # Check the correct init of precisions_init
        g.precisions_init = rand_data.precisions[covar_type]
        g.fit(X)
        assert_array_equal(rand_data.precisions[covar_type], g.precisions_init)
Пример #17
0
def test_gaussian_mixture_fit_best_params():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)
    n_components = rand_data.n_components
    n_init = 10
    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]
        g = GaussianMixture(n_components=n_components,
                            n_init=1,
                            reg_covar=0,
                            random_state=rng,
                            covariance_type=covar_type)
        ll = []
        for _ in range(n_init):
            g.fit(X)
            ll.append(g.score(X))
        ll = np.array(ll)
        g_best = GaussianMixture(n_components=n_components,
                                 n_init=n_init,
                                 reg_covar=0,
                                 random_state=rng,
                                 covariance_type=covar_type)
        g_best.fit(X)
        assert_almost_equal(ll.min(), g_best.score(X))
Пример #18
0
def test_sample():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng, scale=7, n_components=3)
    n_features, n_components = rand_data.n_features, rand_data.n_components

    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]

        gmm = GaussianMixture(n_components=n_components,
                              covariance_type=covar_type,
                              random_state=rng)
        # To sample we need that GaussianMixture is fitted
        assert_raise_message(NotFittedError, "This GaussianMixture instance "
                             "is not fitted", gmm.sample, 0)
        gmm.fit(X)

        assert_raise_message(ValueError, "Invalid value for 'n_samples",
                             gmm.sample, 0)

        # Just to make sure the class samples correctly
        n_samples = 20000
        X_s, y_s = gmm.sample(n_samples)

        for k in range(n_components):
            if covar_type == 'full':
                assert_array_almost_equal(gmm.covariances_[k],
                                          np.cov(X_s[y_s == k].T),
                                          decimal=1)
            elif covar_type == 'tied':
                assert_array_almost_equal(gmm.covariances_,
                                          np.cov(X_s[y_s == k].T),
                                          decimal=1)
            elif covar_type == 'diag':
                assert_array_almost_equal(gmm.covariances_[k],
                                          np.diag(np.cov(X_s[y_s == k].T)),
                                          decimal=1)
            else:
                assert_array_almost_equal(gmm.covariances_[k],
                                          np.var(X_s[y_s == k] -
                                                 gmm.means_[k]),
                                          decimal=1)

        means_s = np.array(
            [np.mean(X_s[y_s == k], 0) for k in range(n_components)])
        assert_array_almost_equal(gmm.means_, means_s, decimal=1)

        # Check shapes of sampled data, see
        # https://github.com/scikit-learn/scikit-learn/issues/7701
        assert X_s.shape == (n_samples, n_features)

        for sample_size in range(1, 100):
            X_s, _ = gmm.sample(sample_size)
            assert X_s.shape == (sample_size, n_features)
Пример #19
0
def test_gaussian_mixture_aic_bic():
    # Test the aic and bic criteria
    rng = np.random.RandomState(0)
    n_samples, n_features, n_components = 50, 3, 2
    X = rng.randn(n_samples, n_features)
    # standard gaussian entropy
    sgh = 0.5 * (fast_logdet(np.cov(X.T, bias=1)) + n_features *
                 (1 + np.log(2 * np.pi)))
    for cv_type in COVARIANCE_TYPE:
        g = GaussianMixture(n_components=n_components,
                            covariance_type=cv_type,
                            random_state=rng,
                            max_iter=200)
        g.fit(X)
        aic = 2 * n_samples * sgh + 2 * g._n_parameters()
        bic = (2 * n_samples * sgh + np.log(n_samples) * g._n_parameters())
        bound = n_features / np.sqrt(n_samples)
        assert (g.aic(X) - aic) / n_samples < bound
        assert (g.bic(X) - bic) / n_samples < bound
Пример #20
0
def test_check_weights():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)

    n_components = rand_data.n_components
    X = rand_data.X['full']

    g = GaussianMixture(n_components=n_components)

    # Check bad shape
    weights_bad_shape = rng.rand(n_components, 1)
    g.weights_init = weights_bad_shape
    assert_raise_message(
        ValueError, "The parameter 'weights' should have the shape of "
        "(%d,), but got %s" % (n_components, str(weights_bad_shape.shape)),
        g.fit, X)

    # Check bad range
    weights_bad_range = rng.rand(n_components) + 1
    g.weights_init = weights_bad_range
    assert_raise_message(
        ValueError, "The parameter 'weights' should be in the range "
        "[0, 1], but got max value %.5f, min value %.5f" %
        (np.min(weights_bad_range), np.max(weights_bad_range)), g.fit, X)

    # Check bad normalization
    weights_bad_norm = rng.rand(n_components)
    weights_bad_norm = weights_bad_norm / (weights_bad_norm.sum() + 1)
    g.weights_init = weights_bad_norm
    assert_raise_message(
        ValueError, "The parameter 'weights' should be normalized, "
        "but got sum(weights) = %.5f" % np.sum(weights_bad_norm), g.fit, X)

    # Check good weights matrix
    weights = rand_data.weights
    g = GaussianMixture(weights_init=weights, n_components=n_components)
    g.fit(X)
    assert_array_equal(weights, g.weights_init)
Пример #21
0
def test_gaussian_mixture_fit_convergence_warning():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng, scale=1)
    n_components = rand_data.n_components
    max_iter = 1
    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]
        g = GaussianMixture(n_components=n_components,
                            n_init=1,
                            max_iter=max_iter,
                            reg_covar=0,
                            random_state=rng,
                            covariance_type=covar_type)
        assert_warns_message(
            ConvergenceWarning, 'Initialization %d did not converge. '
            'Try different init parameters, '
            'or increase max_iter, tol '
            'or check for degenerate data.' % max_iter, g.fit, X)
Пример #22
0
def test_check_means():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)

    n_components, n_features = rand_data.n_components, rand_data.n_features
    X = rand_data.X['full']

    g = GaussianMixture(n_components=n_components)

    # Check means bad shape
    means_bad_shape = rng.rand(n_components + 1, n_features)
    g.means_init = means_bad_shape
    assert_raise_message(ValueError,
                         "The parameter 'means' should have the shape of ",
                         g.fit, X)

    # Check good means matrix
    means = rand_data.means
    g.means_init = means
    g.fit(X)
    assert_array_equal(means, g.means_init)
Пример #23
0
def test_gaussian_mixture_predict_predict_proba():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)
    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]
        Y = rand_data.Y
        g = GaussianMixture(n_components=rand_data.n_components,
                            random_state=rng,
                            weights_init=rand_data.weights,
                            means_init=rand_data.means,
                            precisions_init=rand_data.precisions[covar_type],
                            covariance_type=covar_type)

        # Check a warning message arrive if we don't do fit
        assert_raise_message(
            NotFittedError, "This GaussianMixture instance is not fitted "
            "yet. Call 'fit' with appropriate arguments "
            "before using this estimator.", g.predict, X)

        g.fit(X)
        Y_pred = g.predict(X)
        Y_pred_proba = g.predict_proba(X).argmax(axis=1)
        assert_array_equal(Y_pred, Y_pred_proba)
        assert adjusted_rand_score(Y, Y_pred) > .95
Пример #24
0
# Author: Guillaume Lemaitre <*****@*****.**>
# License: BSD 3 clause

import pytest
import numpy as np

from sklearn_lib.mixture import GaussianMixture
from sklearn_lib.mixture import BayesianGaussianMixture


@pytest.mark.parametrize(
    "estimator",
    [GaussianMixture(), BayesianGaussianMixture()])
def test_gaussian_mixture_n_iter(estimator):
    # check that n_iter is the number of iteration performed.
    rng = np.random.RandomState(0)
    X = rng.rand(10, 5)
    max_iter = 1
    estimator.set_params(max_iter=max_iter)
    estimator.fit(X)
    assert estimator.n_iter_ == max_iter
Пример #25
0
def test_gaussian_mixture_attributes():
    # test bad parameters
    rng = np.random.RandomState(0)
    X = rng.rand(10, 2)

    n_components_bad = 0
    gmm = GaussianMixture(n_components=n_components_bad)
    assert_raise_message(
        ValueError, "Invalid value for 'n_components': %d "
        "Estimation requires at least one component" % n_components_bad,
        gmm.fit, X)

    # covariance_type should be in [spherical, diag, tied, full]
    covariance_type_bad = 'bad_covariance_type'
    gmm = GaussianMixture(covariance_type=covariance_type_bad)
    assert_raise_message(
        ValueError, "Invalid value for 'covariance_type': %s "
        "'covariance_type' should be in "
        "['spherical', 'tied', 'diag', 'full']" % covariance_type_bad, gmm.fit,
        X)

    tol_bad = -1
    gmm = GaussianMixture(tol=tol_bad)
    assert_raise_message(
        ValueError, "Invalid value for 'tol': %.5f "
        "Tolerance used by the EM must be non-negative" % tol_bad, gmm.fit, X)

    reg_covar_bad = -1
    gmm = GaussianMixture(reg_covar=reg_covar_bad)
    assert_raise_message(
        ValueError, "Invalid value for 'reg_covar': %.5f "
        "regularization on covariance must be "
        "non-negative" % reg_covar_bad, gmm.fit, X)

    max_iter_bad = 0
    gmm = GaussianMixture(max_iter=max_iter_bad)
    assert_raise_message(
        ValueError, "Invalid value for 'max_iter': %d "
        "Estimation requires at least one iteration" % max_iter_bad, gmm.fit,
        X)

    n_init_bad = 0
    gmm = GaussianMixture(n_init=n_init_bad)
    assert_raise_message(
        ValueError, "Invalid value for 'n_init': %d "
        "Estimation requires at least one run" % n_init_bad, gmm.fit, X)

    init_params_bad = 'bad_method'
    gmm = GaussianMixture(init_params=init_params_bad)
    assert_raise_message(
        ValueError,
        "Unimplemented initialization method '%s'" % init_params_bad, gmm.fit,
        X)

    # test good parameters
    n_components, tol, n_init, max_iter, reg_covar = 2, 1e-4, 3, 30, 1e-1
    covariance_type, init_params = 'full', 'random'
    gmm = GaussianMixture(n_components=n_components,
                          tol=tol,
                          n_init=n_init,
                          max_iter=max_iter,
                          reg_covar=reg_covar,
                          covariance_type=covariance_type,
                          init_params=init_params).fit(X)

    assert gmm.n_components == n_components
    assert gmm.covariance_type == covariance_type
    assert gmm.tol == tol
    assert gmm.reg_covar == reg_covar
    assert gmm.max_iter == max_iter
    assert gmm.n_init == n_init
    assert gmm.init_params == init_params
Пример #26
0
def test_warm_start(seed):
    random_state = seed
    rng = np.random.RandomState(random_state)
    n_samples, n_features, n_components = 500, 2, 2
    X = rng.rand(n_samples, n_features)

    # Assert the warm_start give the same result for the same number of iter
    g = GaussianMixture(n_components=n_components,
                        n_init=1,
                        max_iter=2,
                        reg_covar=0,
                        random_state=random_state,
                        warm_start=False)
    h = GaussianMixture(n_components=n_components,
                        n_init=1,
                        max_iter=1,
                        reg_covar=0,
                        random_state=random_state,
                        warm_start=True)

    g.fit(X)
    score1 = h.fit(X).score(X)
    score2 = h.fit(X).score(X)

    assert_almost_equal(g.weights_, h.weights_)
    assert_almost_equal(g.means_, h.means_)
    assert_almost_equal(g.precisions_, h.precisions_)
    assert score2 > score1

    # Assert that by using warm_start we can converge to a good solution
    g = GaussianMixture(n_components=n_components,
                        n_init=1,
                        max_iter=5,
                        reg_covar=0,
                        random_state=random_state,
                        warm_start=False,
                        tol=1e-6)
    h = GaussianMixture(n_components=n_components,
                        n_init=1,
                        max_iter=5,
                        reg_covar=0,
                        random_state=random_state,
                        warm_start=True,
                        tol=1e-6)

    g.fit(X)
    assert not g.converged_

    h.fit(X)
    # depending on the data there is large variability in the number of
    # refit necessary to converge due to the complete randomness of the
    # data
    for _ in range(1000):
        h.fit(X)
        if h.converged_:
            break
    assert h.converged_