Пример #1
0
    def test_integration_quic_graph_lasso_fun(self, params_in, expected):
        '''
        Just tests inputs/outputs (not validity of result).
        '''
        X = datasets.load_diabetes().data
        lam = 0.5
        if 'lam' in params_in:
            lam = params_in['lam']
            del params_in['lam']

        S = np.corrcoef(X, rowvar=False)
        if 'init_method' in params_in:
            if params_in['init_method'] == 'cov':
                S = np.cov(X, rowvar=False)

            del params_in['init_method']

        precision_, covariance_, opt_, cpu_time_, iters_, duality_gap_ =\
                quic(S, lam, **params_in)

        result_vec = [
            np.linalg.norm(covariance_),
            np.linalg.norm(precision_),
            np.linalg.norm(opt_),
            np.linalg.norm(duality_gap_),
        ]
        print result_vec
        assert_allclose(expected, result_vec, rtol=1e-1)
Пример #2
0
    def test_integration_quic_graphical_lasso_fun(self, params_in, expected):
        """
        Just tests inputs/outputs (not validity of result).
        """
        X = datasets.load_diabetes().data
        lam = 0.5
        if "lam" in params_in:
            lam = params_in["lam"]
            del params_in["lam"]

        S = np.corrcoef(X, rowvar=False)
        if "init_method" in params_in:
            if params_in["init_method"] == "cov":
                S = np.cov(X, rowvar=False)

            del params_in["init_method"]

        precision_, covariance_, opt_, cpu_time_, iters_, duality_gap_ = quic(
            S, lam, **params_in)

        result_vec = [
            np.linalg.norm(covariance_),
            np.linalg.norm(precision_),
            np.linalg.norm(opt_),
            np.linalg.norm(duality_gap_),
        ]
        print(result_vec)
        assert_allclose(expected, result_vec, atol=1e-1, rtol=1e-1)
Пример #3
0
    def _fit(self, pairs, y):
        if self.use_cov != 'deprecated':
            warnings.warn(
                '"use_cov" parameter is not used.'
                ' It has been deprecated in version 0.5.0 and will be'
                'removed in 0.6.0. Use "prior" instead.', DeprecationWarning)
        if not HAS_SKGGM:
            if self.verbose:
                print("SDML will use scikit-learn's graphical lasso solver.")
        else:
            if self.verbose:
                print("SDML will use skggm's graphical lasso solver.")
        pairs, y = self._prepare_inputs(pairs, y, type_of_inputs='tuples')

        # set up (the inverse of) the prior M
        # if the prior is the default (None), we raise a warning
        if self.prior is None:
            # TODO:
            #  replace prior=None by prior='identity' in v0.6.0 and remove the
            #  warning
            msg = (
                "Warning, no prior was set (`prior=None`). As of version 0.5.0, "
                "the default prior will now be set to "
                "'identity', instead of 'covariance'. If you still want to use "
                "the inverse of the covariance matrix as a prior, "
                "set prior='covariance'. This warning will disappear in "
                "v0.6.0, and `prior` parameter's default value will be set to "
                "'identity'.")
            warnings.warn(msg, ChangedBehaviorWarning)
            prior = 'identity'
        else:
            prior = self.prior
        _, prior_inv = _initialize_metric_mahalanobis(
            pairs,
            prior,
            return_inverse=True,
            strict_pd=True,
            matrix_name='prior',
            random_state=self.random_state)
        diff = pairs[:, 0] - pairs[:, 1]
        loss_matrix = (diff.T * y).dot(diff)
        emp_cov = prior_inv + self.balance_param * loss_matrix

        # our initialization will be the matrix with emp_cov's eigenvalues,
        # with a constant added so that they are all positive (plus an epsilon
        # to ensure definiteness). This is empirical.
        w, V = np.linalg.eigh(emp_cov)
        min_eigval = np.min(w)
        if min_eigval < 0.:
            warnings.warn(
                "Warning, the input matrix of graphical lasso is not "
                "positive semi-definite (PSD). The algorithm may diverge, "
                "and lead to degenerate solutions. "
                "To prevent that, try to decrease the balance parameter "
                "`balance_param` and/or to set prior='identity'.",
                ConvergenceWarning)
            w -= min_eigval  # we translate the eigenvalues to make them all positive
        w += 1e-10  # we add a small offset to avoid definiteness problems
        sigma0 = (V * w).dot(V.T)
        try:
            if HAS_SKGGM:
                theta0 = pinvh(sigma0)
                M, _, _, _, _, _ = quic(emp_cov,
                                        lam=self.sparsity_param,
                                        msg=self.verbose,
                                        Theta0=theta0,
                                        Sigma0=sigma0)
            else:
                _, M = graphical_lasso(emp_cov,
                                       alpha=self.sparsity_param,
                                       verbose=self.verbose,
                                       cov_init=sigma0)
            raised_error = None
            w_mahalanobis, _ = np.linalg.eigh(M)
            not_spd = any(w_mahalanobis < 0.)
            not_finite = not np.isfinite(M).all()
        except Exception as e:
            raised_error = e
            not_spd = False  # not_spd not applicable here so we set to False
            not_finite = False  # not_finite not applicable here so we set to False
        if raised_error is not None or not_spd or not_finite:
            msg = ("There was a problem in SDML when using {}'s graphical "
                   "lasso solver."
                   ).format("skggm" if HAS_SKGGM else "scikit-learn")
            if not HAS_SKGGM:
                skggm_advice = (
                    " skggm's graphical lasso can sometimes converge "
                    "on non SPD cases where scikit-learn's graphical "
                    "lasso fails to converge. Try to install skggm and "
                    "rerun the algorithm (see the README.md for the "
                    "right version of skggm).")
                msg += skggm_advice
            if raised_error is not None:
                msg += " The following error message was thrown: {}.".format(
                    raised_error)
            raise RuntimeError(msg)

        self.components_ = components_from_metric(np.atleast_2d(M))
        return self
Пример #4
0
def latent_variable_glasso_data(X_o, X_h=None,  alpha=0.1, mask=None, S_init=None, \
   max_iter_out=100, Theta_h=None,  verbose=False, threshold=1e-1, return_hists=False):
    '''
       A EM algorithm implementation of the Latent Variable Gaussian Graphical Model 
      
       see review of  "Venkat Chandrasekaran, Pablo A Parrilo, and Alan S Willsky. Latent variable graphical model selection via convex optimization. The Annals of Statistics, 40(4):1935–1967, 2012."


       Loop for t= 1,2,...,
 
       1. M-step:
          solve a sparse inverse covariance estimation using gLasso 
             with expectation of empirical covariance over (observed, latent) data

       2. E-step:
          given the estimated sparse inverse covariance \Sigma_{(o,h)}, find the expectation of covariance over (o,h) given the observed covariance data S

        = [
            [S, -S*Sigma_{oh} ]
            [-S*Sigma_{ho}, eye(h) + Sigma_{ho}*S*Sigma_{oh}]
          ]
 
    '''
    n, m = X_o.shape
    X_o -= np.mean(X_o, axis=0)
    X_o /= X_o.std(axis=0)

    S = np.cov(X_o)

    if X_h is None:
        if mask is not None:
            raise ValueError("Please decide the initial latent variables. ")
        sigma_hidden = 1
        h_dim = int(np.ceil(float(n) / 2.0))  #size of hidden variables
        X_h = sigma_hidden * np.random.randn(h_dim, m)
    else:
        h_dim = X_h.shape[0]

    n_all = n + h_dim

    if alpha == 0:
        if return_costs:
            precision = np.linalg.pinv(S)
            cost = -2. * log_likelihood(S, precision)
            cost += n_features * np.log(2 * np.pi)
            d_gap = np.sum(S * precision) - n
            return S, precision, (cost, d_gap)
        else:
            return S, np.linalg.pinv(S)

    costs = list()
    if S_init is None:
        covariance_o = S.copy()
    else:
        covariance_o = S_init.copy()
    mle_estimate_o = S.copy()

    # stack rows
    X_all = np.concatenate((X_o, X_h), axis=0)
    # compute the covariance of the new (o,h) data
    covariance_all = np.cov(X_all)
    covariance_all[np.ix_(np.arange(n), np.arange(n))] = covariance_o

    # As a trivial regularization (Tikhonov like), we scale down the
    # off-diagonal coefficients of our starting point: This is needed, as
    # in the cross-validation the cov_init can easily be
    # ill-conditioned, and the CV loop blows. Beside, this takes
    # conservative stand-point on the initial conditions, and it tends to
    # make the convergence go faster.
    covariance_all *= 0.95
    diagonal_all = covariance_all.flat[::n_all + 1]
    covariance_all.flat[::n_all + 1] = diagonal_all

    subblock1_index = np.arange(n)
    subblock2_index = n + np.arange(h_dim)

    precision_all = np.linalg.pinv(covariance_all)

    cov_all_list = list()
    cov_all_list.append(covariance_all)
    prec_all_list = list()
    prec_all_list.append(precision_all)

    dsol_list = list()
    # compute a mask that are all ones in subblock1
    if mask is None:
        mask = np.zeros((n_all, n_all))
        mask[np.ix_(subblock1_index, subblock1_index)] = np.ones((n, n))
    else:
        if mask.shape[0] != n_all:
            raise ValueError("mask must be of size (%d, %d)" % (n_all, n_all))
        if mask.shape[0] != mask.shape[1]:
            raise ValueError("mask must be square. shape now (%d, %d)." %
                             (mask.shape))
        if np.linalg.norm(mask - mask.T) > 1e-3:
            raise ValueError("mask must be symmetric.")

    if Theta_h is None:
        Theta_h = np.eye(h_dim)
        Theta_h_inv = np.eye(h_dim)
    else:
        eigval_h, eigvec_h = np.linalg.eigh(Theta_h)
        eigval_h_transformed = np.maximum(eigval_h, 0)
        Theta_h = np.dot(eigval_h_transformed * eigvec_h, eigvec_h.T)
        Theta_h_inv = np.dot((1 / eigval_h_transformed) * eigvec_h, eigvec_h.T)

    # EM-loop
    from tqdm import tqdm
    for t in tqdm(range(max_iter_out)):
        # M-step: find the inverse covariance matrix for entire graph
        # use a package in skggm to solve glaphical lasso with matrix regularizer
        precision_t, _, _, _, _, _ = quic(covariance_all, lam=alpha * mask)

        precision_all = precision_t
        prec_all_list.append(precision_all)
        precision_oh = precision_all[np.ix_(subblock1_index, subblock2_index)]
        # E-step: find the expectation of covariance over (o, h)
        covariance_oh = -np.dot(np.dot(covariance_o, precision_oh),
                                Theta_h_inv)
        covariance_hh = Theta_h_inv - np.dot(precision_oh.T, covariance_oh)

        covariance_all[np.ix_(subblock1_index, subblock1_index)] = covariance_o
        covariance_all[np.ix_(subblock1_index,
                              subblock2_index)] = covariance_oh
        covariance_all[np.ix_(subblock2_index,
                              subblock1_index)] = covariance_oh.T
        covariance_all[np.ix_(subblock2_index,
                              subblock2_index)] = covariance_hh

        cov_all_list.append(covariance_all)
        if t == 0:
            precision_pre = precision_t
            if verbose: print("| d-sol | ")
        else:
            diff = np.linalg.norm(precision_pre - precision_t) / np.sqrt(n)
            dsol_list.append(diff)
            if verbose: print("| %.3f  |" % (diff))
            if diff < threshold:
                break
            else:
                precision_pre = precision_t

    if return_hists:
        return (covariance_all[np.ix_(subblock1_index, subblock1_index)],
                precision_all[np.ix_(subblock1_index, subblock1_index)],
                cov_all_list, prec_all_list, dsol_list)
    else:
        return (covariance_all[np.ix_(subblock1_index, subblock1_index)],
                precision_all[np.ix_(subblock1_index, subblock1_index)])
Пример #5
0
  def _fit(self, pairs, y):
    if not HAS_SKGGM:
      if self.verbose:
        print("SDML will use scikit-learn's graphical lasso solver.")
    else:
      if self.verbose:
        print("SDML will use skggm's graphical lasso solver.")
    pairs, y = self._prepare_inputs(pairs, y,
                                    type_of_inputs='tuples')

    # set up (the inverse of) the prior M
    if self.use_cov:
      X = np.vstack({tuple(row) for row in pairs.reshape(-1, pairs.shape[2])})
      prior_inv = np.atleast_2d(np.cov(X, rowvar=False))
    else:
      prior_inv = np.identity(pairs.shape[2])
    diff = pairs[:, 0] - pairs[:, 1]
    loss_matrix = (diff.T * y).dot(diff)
    emp_cov = prior_inv + self.balance_param * loss_matrix

    # our initialization will be the matrix with emp_cov's eigenvalues,
    # with a constant added so that they are all positive (plus an epsilon
    # to ensure definiteness). This is empirical.
    w, V = np.linalg.eigh(emp_cov)
    min_eigval = np.min(w)
    if min_eigval < 0.:
      warnings.warn("Warning, the input matrix of graphical lasso is not "
                    "positive semi-definite (PSD). The algorithm may diverge, "
                    "and lead to degenerate solutions. "
                    "To prevent that, try to decrease the balance parameter "
                    "`balance_param` and/or to set use_cov=False.",
                    ConvergenceWarning)
      w -= min_eigval  # we translate the eigenvalues to make them all positive
    w += 1e-10  # we add a small offset to avoid definiteness problems
    sigma0 = (V * w).dot(V.T)
    try:
      if HAS_SKGGM:
        theta0 = pinvh(sigma0)
        M, _, _, _, _, _ = quic(emp_cov, lam=self.sparsity_param,
                                msg=self.verbose,
                                Theta0=theta0, Sigma0=sigma0)
      else:
        _, M = graphical_lasso(emp_cov, alpha=self.sparsity_param,
                               verbose=self.verbose,
                               cov_init=sigma0)
      raised_error = None
      w_mahalanobis, _ = np.linalg.eigh(M)
      not_spd = any(w_mahalanobis < 0.)
      not_finite = not np.isfinite(M).all()
    except Exception as e:
      raised_error = e
      not_spd = False  # not_spd not applicable here so we set to False
      not_finite = False  # not_finite not applicable here so we set to False
    if raised_error is not None or not_spd or not_finite:
      msg = ("There was a problem in SDML when using {}'s graphical "
             "lasso solver.").format("skggm" if HAS_SKGGM else "scikit-learn")
      if not HAS_SKGGM:
        skggm_advice = (" skggm's graphical lasso can sometimes converge "
                        "on non SPD cases where scikit-learn's graphical "
                        "lasso fails to converge. Try to install skggm and "
                        "rerun the algorithm (see the README.md for the "
                        "right version of skggm).")
        msg += skggm_advice
      if raised_error is not None:
        msg += " The following error message was thrown: {}.".format(
            raised_error)
      raise RuntimeError(msg)

    self.transformer_ = transformer_from_metric(np.atleast_2d(M))
    return self
Пример #6
0
import sys

sys.path.append("..")
sys.path.append("../inverse_covariance")

from sklearn.covariance import graph_lasso
from inverse_covariance import quic
import numpy as np

#############################################################################
# Example 1
# graph_lasso fails to converge at lam = .009 * np.max(np.abs(Shat))
X = np.loadtxt("data/Mazumder_example1.txt", delimiter=",")
Shat = np.cov(X, rowvar=0)
try:
    graph_lasso(Shat, alpha=.004)
except FloatingPointError as e:
    print("{0}".format(e))
vals = quic(Shat, .004)

#############################################################################
# Example 2
# graph_lasso fails to converge at lam = .009 * np.max(np.abs(Shat))
X = np.loadtxt("data/Mazumder_example2.txt", delimiter=",")
Shat = np.cov(X, rowvar=0)
try:
    graph_lasso(Shat, alpha=.02)
except FloatingPointError as e:
    print("{0}".format(e))
vals = quic(Shat, .02)
Пример #7
0
    def _fit(self, pairs, y):
        if not HAS_SKGGM:
            if self.verbose:
                print("SDML will use scikit-learn's graphical lasso solver.")
        else:
            if self.verbose:
                print("SDML will use skggm's graphical lasso solver.")
        pairs, y = self._prepare_inputs(pairs, y, type_of_inputs='tuples')

        # set up (the inverse of) the prior M
        if self.use_cov:
            X = np.vstack(
                {tuple(row)
                 for row in pairs.reshape(-1, pairs.shape[2])})
            prior_inv = np.atleast_2d(np.cov(X, rowvar=False))
        else:
            prior_inv = np.identity(pairs.shape[2])
        diff = pairs[:, 0] - pairs[:, 1]
        loss_matrix = (diff.T * y).dot(diff)
        emp_cov = prior_inv + self.balance_param * loss_matrix

        # our initialization will be the matrix with emp_cov's eigenvalues,
        # with a constant added so that they are all positive (plus an epsilon
        # to ensure definiteness). This is empirical.
        w, V = np.linalg.eigh(emp_cov)
        min_eigval = np.min(w)
        if min_eigval < 0.:
            warnings.warn(
                "Warning, the input matrix of graphical lasso is not "
                "positive semi-definite (PSD). The algorithm may diverge, "
                "and lead to degenerate solutions. "
                "To prevent that, try to decrease the balance parameter "
                "`balance_param` and/or to set use_cov=False.",
                ConvergenceWarning)
            w -= min_eigval  # we translate the eigenvalues to make them all positive
        w += 1e-10  # we add a small offset to avoid definiteness problems
        sigma0 = (V * w).dot(V.T)
        try:
            if HAS_SKGGM:
                theta0 = pinvh(sigma0)
                M, _, _, _, _, _ = quic(emp_cov,
                                        lam=self.sparsity_param,
                                        msg=self.verbose,
                                        Theta0=theta0,
                                        Sigma0=sigma0)
            else:
                _, M = graphical_lasso(emp_cov,
                                       alpha=self.sparsity_param,
                                       verbose=self.verbose,
                                       cov_init=sigma0)
            raised_error = None
            w_mahalanobis, _ = np.linalg.eigh(M)
            not_spd = any(w_mahalanobis < 0.)
            not_finite = not np.isfinite(M).all()
        except Exception as e:
            raised_error = e
            not_spd = False  # not_spd not applicable here so we set to False
            not_finite = False  # not_finite not applicable here so we set to False
        if raised_error is not None or not_spd or not_finite:
            msg = ("There was a problem in SDML when using {}'s graphical "
                   "lasso solver."
                   ).format("skggm" if HAS_SKGGM else "scikit-learn")
            if not HAS_SKGGM:
                skggm_advice = (
                    " skggm's graphical lasso can sometimes converge "
                    "on non SPD cases where scikit-learn's graphical "
                    "lasso fails to converge. Try to install skggm and "
                    "rerun the algorithm (see the README.md for the "
                    "right version of skggm).")
                msg += skggm_advice
            if raised_error is not None:
                msg += " The following error message was thrown: {}.".format(
                    raised_error)
            raise RuntimeError(msg)

        self.transformer_ = transformer_from_metric(np.atleast_2d(M))
        return self