Пример #1
0
def test_suffstat_sk_full():
    # compare the precision matrix compute from the
    # EmpiricalCovariance.covariance fitted on X*sqrt(resp)
    # with _sufficient_sk_full, n_components=1
    rng = np.random.RandomState(0)
    n_samples, n_features = 500, 2

    # special case 1, assuming data is "centered"
    X = rng.rand(n_samples, n_features)
    resp = rng.rand(n_samples, 1)
    X_resp = np.sqrt(resp) * X
    nk = np.array([n_samples])
    xk = np.zeros((1, n_features))
    precs_pred = _estimate_gaussian_precisions_cholesky_full(
        resp, X, nk, xk, 0)
    covars_pred = linalg.inv(np.dot(precs_pred[0], precs_pred[0].T))
    ecov = EmpiricalCovariance(assume_centered=True)
    ecov.fit(X_resp)
    assert_almost_equal(ecov.error_norm(covars_pred, norm='frobenius'), 0)
    assert_almost_equal(ecov.error_norm(covars_pred, norm='spectral'), 0)

    # special case 2, assuming resp are all ones
    resp = np.ones((n_samples, 1))
    nk = np.array([n_samples])
    xk = X.mean(axis=0).reshape((1, -1))
    precs_pred = _estimate_gaussian_precisions_cholesky_full(
        resp, X, nk, xk, 0)
    covars_pred = linalg.inv(np.dot(precs_pred[0], precs_pred[0].T))
    ecov = EmpiricalCovariance(assume_centered=False)
    ecov.fit(X)
    assert_almost_equal(ecov.error_norm(covars_pred, norm='frobenius'), 0)
    assert_almost_equal(ecov.error_norm(covars_pred, norm='spectral'), 0)
def test_suffstat_sk_full():
    # compare the precision matrix compute from the
    # EmpiricalCovariance.covariance fitted on X*sqrt(resp)
    # with _sufficient_sk_full, n_components=1
    rng = np.random.RandomState(0)
    n_samples, n_features = 500, 2

    # special case 1, assuming data is "centered"
    X = rng.rand(n_samples, n_features)
    resp = rng.rand(n_samples, 1)
    X_resp = np.sqrt(resp) * X
    nk = np.array([n_samples])
    xk = np.zeros((1, n_features))
    precs_pred = _estimate_gaussian_precisions_cholesky_full(resp, X,
                                                             nk, xk, 0)
    covars_pred = linalg.inv(np.dot(precs_pred[0], precs_pred[0].T))
    ecov = EmpiricalCovariance(assume_centered=True)
    ecov.fit(X_resp)
    assert_almost_equal(ecov.error_norm(covars_pred, norm='frobenius'), 0)
    assert_almost_equal(ecov.error_norm(covars_pred, norm='spectral'), 0)

    # special case 2, assuming resp are all ones
    resp = np.ones((n_samples, 1))
    nk = np.array([n_samples])
    xk = X.mean(axis=0).reshape((1, -1))
    precs_pred = _estimate_gaussian_precisions_cholesky_full(resp, X,
                                                             nk, xk, 0)
    covars_pred = linalg.inv(np.dot(precs_pred[0], precs_pred[0].T))
    ecov = EmpiricalCovariance(assume_centered=False)
    ecov.fit(X)
    assert_almost_equal(ecov.error_norm(covars_pred, norm='frobenius'), 0)
    assert_almost_equal(ecov.error_norm(covars_pred, norm='spectral'), 0)
Пример #3
0
def test_suffstat_sk_diag():
    # test against 'full' case
    rng = np.random.RandomState(0)
    n_samples, n_features, n_components = 500, 2, 2

    resp = rng.rand(n_samples, n_components)
    resp = resp / resp.sum(axis=1)[:, np.newaxis]
    X = rng.rand(n_samples, n_features)
    nk = resp.sum(axis=0)
    xk = np.dot(resp.T, X) / nk[:, np.newaxis]
    precs_pred_full = _estimate_gaussian_precisions_cholesky_full(
        resp, X, nk, xk, 0)
    covars_pred_full = [
        linalg.inv(np.dot(precision_chol, precision_chol.T))
        for precision_chol in precs_pred_full
    ]

    precs_pred_diag = _estimate_gaussian_precisions_cholesky_diag(
        resp, X, nk, xk, 0)
    covars_pred_diag = np.array([np.diag(1. / d)**2 for d in precs_pred_diag])

    ecov = EmpiricalCovariance()
    for (cov_full, cov_diag) in zip(covars_pred_full, covars_pred_diag):
        ecov.covariance_ = np.diag(np.diag(cov_full))
        assert_almost_equal(ecov.error_norm(cov_diag, norm='frobenius'), 0)
        assert_almost_equal(ecov.error_norm(cov_diag, norm='spectral'), 0)
Пример #4
0
def test_suffstat_sk_tied():
    # use equation Nk * Sk / N = S_tied
    rng = np.random.RandomState(0)
    n_samples, n_features, n_components = 500, 2, 2

    resp = rng.rand(n_samples, n_components)
    resp = resp / resp.sum(axis=1)[:, np.newaxis]
    X = rng.rand(n_samples, n_features)
    nk = resp.sum(axis=0)
    xk = np.dot(resp.T, X) / nk[:, np.newaxis]

    precs_pred_full = _estimate_gaussian_precisions_cholesky_full(
        resp, X, nk, xk, 0)
    covars_pred_full = [
        linalg.inv(np.dot(precision_chol, precision_chol.T))
        for precision_chol in precs_pred_full
    ]
    covars_pred_full = np.sum(nk[:, np.newaxis, np.newaxis] * covars_pred_full,
                              0) / n_samples

    precs_pred_tied = _estimate_gaussian_precisions_cholesky_tied(
        resp, X, nk, xk, 0)
    covars_pred_tied = linalg.inv(np.dot(precs_pred_tied, precs_pred_tied.T))

    ecov = EmpiricalCovariance()
    ecov.covariance_ = covars_pred_full
    assert_almost_equal(ecov.error_norm(covars_pred_tied, norm='frobenius'), 0)
    assert_almost_equal(ecov.error_norm(covars_pred_tied, norm='spectral'), 0)
def test_suffstat_sk_diag():
    # test against 'full' case
    rng = np.random.RandomState(0)
    n_samples, n_features, n_components = 500, 2, 2

    resp = rng.rand(n_samples, n_components)
    resp = resp / resp.sum(axis=1)[:, np.newaxis]
    X = rng.rand(n_samples, n_features)
    nk = resp.sum(axis=0)
    xk = np.dot(resp.T, X) / nk[:, np.newaxis]
    precs_pred_full = _estimate_gaussian_precisions_cholesky_full(resp, X,
                                                                  nk, xk, 0)
    covars_pred_full = [linalg.inv(np.dot(precision_chol, precision_chol.T))
                        for precision_chol in precs_pred_full]

    precs_pred_diag = _estimate_gaussian_precisions_cholesky_diag(resp, X,
                                                                  nk, xk, 0)
    covars_pred_diag = np.array([np.diag(1. / d) ** 2
                                 for d in precs_pred_diag])

    ecov = EmpiricalCovariance()
    for (cov_full, cov_diag) in zip(covars_pred_full, covars_pred_diag):
        ecov.covariance_ = np.diag(np.diag(cov_full))
        assert_almost_equal(ecov.error_norm(cov_diag, norm='frobenius'), 0)
        assert_almost_equal(ecov.error_norm(cov_diag, norm='spectral'), 0)
def test_suffstat_sk_tied():
    # use equation Nk * Sk / N = S_tied
    rng = np.random.RandomState(0)
    n_samples, n_features, n_components = 500, 2, 2

    resp = rng.rand(n_samples, n_components)
    resp = resp / resp.sum(axis=1)[:, np.newaxis]
    X = rng.rand(n_samples, n_features)
    nk = resp.sum(axis=0)
    xk = np.dot(resp.T, X) / nk[:, np.newaxis]

    precs_pred_full = _estimate_gaussian_precisions_cholesky_full(resp, X,
                                                                  nk, xk, 0)
    covars_pred_full = [linalg.inv(np.dot(precision_chol, precision_chol.T))
                        for precision_chol in precs_pred_full]
    covars_pred_full = np.sum(nk[:, np.newaxis, np.newaxis] * covars_pred_full,
                              0) / n_samples

    precs_pred_tied = _estimate_gaussian_precisions_cholesky_tied(resp, X,
                                                                  nk, xk, 0)
    covars_pred_tied = linalg.inv(np.dot(precs_pred_tied, precs_pred_tied.T))

    ecov = EmpiricalCovariance()
    ecov.covariance_ = covars_pred_full
    assert_almost_equal(ecov.error_norm(covars_pred_tied, norm='frobenius'), 0)
    assert_almost_equal(ecov.error_norm(covars_pred_tied, norm='spectral'), 0)