def test_suffstat_sk_tied(): # use equation Nk * Sk / N = S_tied rng = np.random.RandomState(0) n_samples, n_features, n_components = 500, 2, 2 resp = rng.rand(n_samples, n_components) resp = resp / resp.sum(axis=1)[:, np.newaxis] X = rng.rand(n_samples, n_features) nk = resp.sum(axis=0) xk = np.dot(resp.T, X) / nk[:, np.newaxis] covars_pred_full = _estimate_gaussian_covariances_full(resp, X, nk, xk, 0) covars_pred_full = np.sum(nk[:, np.newaxis, np.newaxis] * covars_pred_full, 0) / n_samples covars_pred_tied = _estimate_gaussian_covariances_tied(resp, X, nk, xk, 0) ecov = EmpiricalCovariance() ecov.covariance_ = covars_pred_full assert_almost_equal(ecov.error_norm(covars_pred_tied, norm='frobenius'), 0) assert_almost_equal(ecov.error_norm(covars_pred_tied, norm='spectral'), 0) # check the precision computation precs_chol_pred = _compute_precision_cholesky(covars_pred_tied, 'tied') precs_pred = np.dot(precs_chol_pred, precs_chol_pred.T) precs_est = linalg.inv(covars_pred_tied) assert_array_almost_equal(precs_est, precs_pred)
def test_suffstat_sk_full(): # compare the precision matrix compute from the # EmpiricalCovariance.covariance fitted on X*sqrt(resp) # with _sufficient_sk_full, n_components=1 rng = np.random.RandomState(0) n_samples, n_features = 500, 2 # special case 1, assuming data is "centered" X = rng.rand(n_samples, n_features) resp = rng.rand(n_samples, 1) X_resp = np.sqrt(resp) * X nk = np.array([n_samples]) xk = np.zeros((1, n_features)) covars_pred = _estimate_gaussian_covariances_full(resp, X, nk, xk, 0) ecov = EmpiricalCovariance(assume_centered=True) ecov.fit(X_resp) assert_almost_equal(ecov.error_norm(covars_pred[0], norm='frobenius'), 0) assert_almost_equal(ecov.error_norm(covars_pred[0], norm='spectral'), 0) # check the precision computation precs_chol_pred = _compute_precision_cholesky(covars_pred, 'full') precs_pred = np.array([np.dot(prec, prec.T) for prec in precs_chol_pred]) precs_est = np.array([linalg.inv(cov) for cov in covars_pred]) assert_array_almost_equal(precs_est, precs_pred) # special case 2, assuming resp are all ones resp = np.ones((n_samples, 1)) nk = np.array([n_samples]) xk = X.mean(axis=0).reshape((1, -1)) covars_pred = _estimate_gaussian_covariances_full(resp, X, nk, xk, 0) ecov = EmpiricalCovariance(assume_centered=False) ecov.fit(X) assert_almost_equal(ecov.error_norm(covars_pred[0], norm='frobenius'), 0) assert_almost_equal(ecov.error_norm(covars_pred[0], norm='spectral'), 0) # check the precision computation precs_chol_pred = _compute_precision_cholesky(covars_pred, 'full') precs_pred = np.array([np.dot(prec, prec.T) for prec in precs_chol_pred]) precs_est = np.array([linalg.inv(cov) for cov in covars_pred]) assert_array_almost_equal(precs_est, precs_pred)
def test_suffstat_sk_diag(): # test against 'full' case rng = np.random.RandomState(0) n_samples, n_features, n_components = 500, 2, 2 resp = rng.rand(n_samples, n_components) resp = resp / resp.sum(axis=1)[:, np.newaxis] X = rng.rand(n_samples, n_features) nk = resp.sum(axis=0) xk = np.dot(resp.T, X) / nk[:, np.newaxis] covars_pred_full = _estimate_gaussian_covariances_full(resp, X, nk, xk, 0) covars_pred_diag = _estimate_gaussian_covariances_diag(resp, X, nk, xk, 0) ecov = EmpiricalCovariance() for (cov_full, cov_diag) in zip(covars_pred_full, covars_pred_diag): ecov.covariance_ = np.diag(np.diag(cov_full)) cov_diag = np.diag(cov_diag) assert_almost_equal(ecov.error_norm(cov_diag, norm='frobenius'), 0) assert_almost_equal(ecov.error_norm(cov_diag, norm='spectral'), 0) # check the precision computation precs_chol_pred = _compute_precision_cholesky(covars_pred_diag, 'diag') assert_almost_equal(covars_pred_diag, 1. / precs_chol_pred**2)