def test_integration_quic_graph_lasso_fun(self, params_in, expected): ''' Just tests inputs/outputs (not validity of result). ''' X = datasets.load_diabetes().data lam = 0.5 if 'lam' in params_in: lam = params_in['lam'] del params_in['lam'] S = np.corrcoef(X, rowvar=False) if 'init_method' in params_in: if params_in['init_method'] == 'cov': S = np.cov(X, rowvar=False) del params_in['init_method'] precision_, covariance_, opt_, cpu_time_, iters_, duality_gap_ =\ quic(S, lam, **params_in) result_vec = [ np.linalg.norm(covariance_), np.linalg.norm(precision_), np.linalg.norm(opt_), np.linalg.norm(duality_gap_), ] print result_vec assert_allclose(expected, result_vec, rtol=1e-1)
def test_integration_quic_graphical_lasso_fun(self, params_in, expected): """ Just tests inputs/outputs (not validity of result). """ X = datasets.load_diabetes().data lam = 0.5 if "lam" in params_in: lam = params_in["lam"] del params_in["lam"] S = np.corrcoef(X, rowvar=False) if "init_method" in params_in: if params_in["init_method"] == "cov": S = np.cov(X, rowvar=False) del params_in["init_method"] precision_, covariance_, opt_, cpu_time_, iters_, duality_gap_ = quic( S, lam, **params_in) result_vec = [ np.linalg.norm(covariance_), np.linalg.norm(precision_), np.linalg.norm(opt_), np.linalg.norm(duality_gap_), ] print(result_vec) assert_allclose(expected, result_vec, atol=1e-1, rtol=1e-1)
def _fit(self, pairs, y): if self.use_cov != 'deprecated': warnings.warn( '"use_cov" parameter is not used.' ' It has been deprecated in version 0.5.0 and will be' 'removed in 0.6.0. Use "prior" instead.', DeprecationWarning) if not HAS_SKGGM: if self.verbose: print("SDML will use scikit-learn's graphical lasso solver.") else: if self.verbose: print("SDML will use skggm's graphical lasso solver.") pairs, y = self._prepare_inputs(pairs, y, type_of_inputs='tuples') # set up (the inverse of) the prior M # if the prior is the default (None), we raise a warning if self.prior is None: # TODO: # replace prior=None by prior='identity' in v0.6.0 and remove the # warning msg = ( "Warning, no prior was set (`prior=None`). As of version 0.5.0, " "the default prior will now be set to " "'identity', instead of 'covariance'. If you still want to use " "the inverse of the covariance matrix as a prior, " "set prior='covariance'. This warning will disappear in " "v0.6.0, and `prior` parameter's default value will be set to " "'identity'.") warnings.warn(msg, ChangedBehaviorWarning) prior = 'identity' else: prior = self.prior _, prior_inv = _initialize_metric_mahalanobis( pairs, prior, return_inverse=True, strict_pd=True, matrix_name='prior', random_state=self.random_state) diff = pairs[:, 0] - pairs[:, 1] loss_matrix = (diff.T * y).dot(diff) emp_cov = prior_inv + self.balance_param * loss_matrix # our initialization will be the matrix with emp_cov's eigenvalues, # with a constant added so that they are all positive (plus an epsilon # to ensure definiteness). This is empirical. w, V = np.linalg.eigh(emp_cov) min_eigval = np.min(w) if min_eigval < 0.: warnings.warn( "Warning, the input matrix of graphical lasso is not " "positive semi-definite (PSD). The algorithm may diverge, " "and lead to degenerate solutions. " "To prevent that, try to decrease the balance parameter " "`balance_param` and/or to set prior='identity'.", ConvergenceWarning) w -= min_eigval # we translate the eigenvalues to make them all positive w += 1e-10 # we add a small offset to avoid definiteness problems sigma0 = (V * w).dot(V.T) try: if HAS_SKGGM: theta0 = pinvh(sigma0) M, _, _, _, _, _ = quic(emp_cov, lam=self.sparsity_param, msg=self.verbose, Theta0=theta0, Sigma0=sigma0) else: _, M = graphical_lasso(emp_cov, alpha=self.sparsity_param, verbose=self.verbose, cov_init=sigma0) raised_error = None w_mahalanobis, _ = np.linalg.eigh(M) not_spd = any(w_mahalanobis < 0.) not_finite = not np.isfinite(M).all() except Exception as e: raised_error = e not_spd = False # not_spd not applicable here so we set to False not_finite = False # not_finite not applicable here so we set to False if raised_error is not None or not_spd or not_finite: msg = ("There was a problem in SDML when using {}'s graphical " "lasso solver." ).format("skggm" if HAS_SKGGM else "scikit-learn") if not HAS_SKGGM: skggm_advice = ( " skggm's graphical lasso can sometimes converge " "on non SPD cases where scikit-learn's graphical " "lasso fails to converge. Try to install skggm and " "rerun the algorithm (see the README.md for the " "right version of skggm).") msg += skggm_advice if raised_error is not None: msg += " The following error message was thrown: {}.".format( raised_error) raise RuntimeError(msg) self.components_ = components_from_metric(np.atleast_2d(M)) return self
def latent_variable_glasso_data(X_o, X_h=None, alpha=0.1, mask=None, S_init=None, \ max_iter_out=100, Theta_h=None, verbose=False, threshold=1e-1, return_hists=False): ''' A EM algorithm implementation of the Latent Variable Gaussian Graphical Model see review of "Venkat Chandrasekaran, Pablo A Parrilo, and Alan S Willsky. Latent variable graphical model selection via convex optimization. The Annals of Statistics, 40(4):1935–1967, 2012." Loop for t= 1,2,..., 1. M-step: solve a sparse inverse covariance estimation using gLasso with expectation of empirical covariance over (observed, latent) data 2. E-step: given the estimated sparse inverse covariance \Sigma_{(o,h)}, find the expectation of covariance over (o,h) given the observed covariance data S = [ [S, -S*Sigma_{oh} ] [-S*Sigma_{ho}, eye(h) + Sigma_{ho}*S*Sigma_{oh}] ] ''' n, m = X_o.shape X_o -= np.mean(X_o, axis=0) X_o /= X_o.std(axis=0) S = np.cov(X_o) if X_h is None: if mask is not None: raise ValueError("Please decide the initial latent variables. ") sigma_hidden = 1 h_dim = int(np.ceil(float(n) / 2.0)) #size of hidden variables X_h = sigma_hidden * np.random.randn(h_dim, m) else: h_dim = X_h.shape[0] n_all = n + h_dim if alpha == 0: if return_costs: precision = np.linalg.pinv(S) cost = -2. * log_likelihood(S, precision) cost += n_features * np.log(2 * np.pi) d_gap = np.sum(S * precision) - n return S, precision, (cost, d_gap) else: return S, np.linalg.pinv(S) costs = list() if S_init is None: covariance_o = S.copy() else: covariance_o = S_init.copy() mle_estimate_o = S.copy() # stack rows X_all = np.concatenate((X_o, X_h), axis=0) # compute the covariance of the new (o,h) data covariance_all = np.cov(X_all) covariance_all[np.ix_(np.arange(n), np.arange(n))] = covariance_o # As a trivial regularization (Tikhonov like), we scale down the # off-diagonal coefficients of our starting point: This is needed, as # in the cross-validation the cov_init can easily be # ill-conditioned, and the CV loop blows. Beside, this takes # conservative stand-point on the initial conditions, and it tends to # make the convergence go faster. covariance_all *= 0.95 diagonal_all = covariance_all.flat[::n_all + 1] covariance_all.flat[::n_all + 1] = diagonal_all subblock1_index = np.arange(n) subblock2_index = n + np.arange(h_dim) precision_all = np.linalg.pinv(covariance_all) cov_all_list = list() cov_all_list.append(covariance_all) prec_all_list = list() prec_all_list.append(precision_all) dsol_list = list() # compute a mask that are all ones in subblock1 if mask is None: mask = np.zeros((n_all, n_all)) mask[np.ix_(subblock1_index, subblock1_index)] = np.ones((n, n)) else: if mask.shape[0] != n_all: raise ValueError("mask must be of size (%d, %d)" % (n_all, n_all)) if mask.shape[0] != mask.shape[1]: raise ValueError("mask must be square. shape now (%d, %d)." % (mask.shape)) if np.linalg.norm(mask - mask.T) > 1e-3: raise ValueError("mask must be symmetric.") if Theta_h is None: Theta_h = np.eye(h_dim) Theta_h_inv = np.eye(h_dim) else: eigval_h, eigvec_h = np.linalg.eigh(Theta_h) eigval_h_transformed = np.maximum(eigval_h, 0) Theta_h = np.dot(eigval_h_transformed * eigvec_h, eigvec_h.T) Theta_h_inv = np.dot((1 / eigval_h_transformed) * eigvec_h, eigvec_h.T) # EM-loop from tqdm import tqdm for t in tqdm(range(max_iter_out)): # M-step: find the inverse covariance matrix for entire graph # use a package in skggm to solve glaphical lasso with matrix regularizer precision_t, _, _, _, _, _ = quic(covariance_all, lam=alpha * mask) precision_all = precision_t prec_all_list.append(precision_all) precision_oh = precision_all[np.ix_(subblock1_index, subblock2_index)] # E-step: find the expectation of covariance over (o, h) covariance_oh = -np.dot(np.dot(covariance_o, precision_oh), Theta_h_inv) covariance_hh = Theta_h_inv - np.dot(precision_oh.T, covariance_oh) covariance_all[np.ix_(subblock1_index, subblock1_index)] = covariance_o covariance_all[np.ix_(subblock1_index, subblock2_index)] = covariance_oh covariance_all[np.ix_(subblock2_index, subblock1_index)] = covariance_oh.T covariance_all[np.ix_(subblock2_index, subblock2_index)] = covariance_hh cov_all_list.append(covariance_all) if t == 0: precision_pre = precision_t if verbose: print("| d-sol | ") else: diff = np.linalg.norm(precision_pre - precision_t) / np.sqrt(n) dsol_list.append(diff) if verbose: print("| %.3f |" % (diff)) if diff < threshold: break else: precision_pre = precision_t if return_hists: return (covariance_all[np.ix_(subblock1_index, subblock1_index)], precision_all[np.ix_(subblock1_index, subblock1_index)], cov_all_list, prec_all_list, dsol_list) else: return (covariance_all[np.ix_(subblock1_index, subblock1_index)], precision_all[np.ix_(subblock1_index, subblock1_index)])
def _fit(self, pairs, y): if not HAS_SKGGM: if self.verbose: print("SDML will use scikit-learn's graphical lasso solver.") else: if self.verbose: print("SDML will use skggm's graphical lasso solver.") pairs, y = self._prepare_inputs(pairs, y, type_of_inputs='tuples') # set up (the inverse of) the prior M if self.use_cov: X = np.vstack({tuple(row) for row in pairs.reshape(-1, pairs.shape[2])}) prior_inv = np.atleast_2d(np.cov(X, rowvar=False)) else: prior_inv = np.identity(pairs.shape[2]) diff = pairs[:, 0] - pairs[:, 1] loss_matrix = (diff.T * y).dot(diff) emp_cov = prior_inv + self.balance_param * loss_matrix # our initialization will be the matrix with emp_cov's eigenvalues, # with a constant added so that they are all positive (plus an epsilon # to ensure definiteness). This is empirical. w, V = np.linalg.eigh(emp_cov) min_eigval = np.min(w) if min_eigval < 0.: warnings.warn("Warning, the input matrix of graphical lasso is not " "positive semi-definite (PSD). The algorithm may diverge, " "and lead to degenerate solutions. " "To prevent that, try to decrease the balance parameter " "`balance_param` and/or to set use_cov=False.", ConvergenceWarning) w -= min_eigval # we translate the eigenvalues to make them all positive w += 1e-10 # we add a small offset to avoid definiteness problems sigma0 = (V * w).dot(V.T) try: if HAS_SKGGM: theta0 = pinvh(sigma0) M, _, _, _, _, _ = quic(emp_cov, lam=self.sparsity_param, msg=self.verbose, Theta0=theta0, Sigma0=sigma0) else: _, M = graphical_lasso(emp_cov, alpha=self.sparsity_param, verbose=self.verbose, cov_init=sigma0) raised_error = None w_mahalanobis, _ = np.linalg.eigh(M) not_spd = any(w_mahalanobis < 0.) not_finite = not np.isfinite(M).all() except Exception as e: raised_error = e not_spd = False # not_spd not applicable here so we set to False not_finite = False # not_finite not applicable here so we set to False if raised_error is not None or not_spd or not_finite: msg = ("There was a problem in SDML when using {}'s graphical " "lasso solver.").format("skggm" if HAS_SKGGM else "scikit-learn") if not HAS_SKGGM: skggm_advice = (" skggm's graphical lasso can sometimes converge " "on non SPD cases where scikit-learn's graphical " "lasso fails to converge. Try to install skggm and " "rerun the algorithm (see the README.md for the " "right version of skggm).") msg += skggm_advice if raised_error is not None: msg += " The following error message was thrown: {}.".format( raised_error) raise RuntimeError(msg) self.transformer_ = transformer_from_metric(np.atleast_2d(M)) return self
import sys sys.path.append("..") sys.path.append("../inverse_covariance") from sklearn.covariance import graph_lasso from inverse_covariance import quic import numpy as np ############################################################################# # Example 1 # graph_lasso fails to converge at lam = .009 * np.max(np.abs(Shat)) X = np.loadtxt("data/Mazumder_example1.txt", delimiter=",") Shat = np.cov(X, rowvar=0) try: graph_lasso(Shat, alpha=.004) except FloatingPointError as e: print("{0}".format(e)) vals = quic(Shat, .004) ############################################################################# # Example 2 # graph_lasso fails to converge at lam = .009 * np.max(np.abs(Shat)) X = np.loadtxt("data/Mazumder_example2.txt", delimiter=",") Shat = np.cov(X, rowvar=0) try: graph_lasso(Shat, alpha=.02) except FloatingPointError as e: print("{0}".format(e)) vals = quic(Shat, .02)
def _fit(self, pairs, y): if not HAS_SKGGM: if self.verbose: print("SDML will use scikit-learn's graphical lasso solver.") else: if self.verbose: print("SDML will use skggm's graphical lasso solver.") pairs, y = self._prepare_inputs(pairs, y, type_of_inputs='tuples') # set up (the inverse of) the prior M if self.use_cov: X = np.vstack( {tuple(row) for row in pairs.reshape(-1, pairs.shape[2])}) prior_inv = np.atleast_2d(np.cov(X, rowvar=False)) else: prior_inv = np.identity(pairs.shape[2]) diff = pairs[:, 0] - pairs[:, 1] loss_matrix = (diff.T * y).dot(diff) emp_cov = prior_inv + self.balance_param * loss_matrix # our initialization will be the matrix with emp_cov's eigenvalues, # with a constant added so that they are all positive (plus an epsilon # to ensure definiteness). This is empirical. w, V = np.linalg.eigh(emp_cov) min_eigval = np.min(w) if min_eigval < 0.: warnings.warn( "Warning, the input matrix of graphical lasso is not " "positive semi-definite (PSD). The algorithm may diverge, " "and lead to degenerate solutions. " "To prevent that, try to decrease the balance parameter " "`balance_param` and/or to set use_cov=False.", ConvergenceWarning) w -= min_eigval # we translate the eigenvalues to make them all positive w += 1e-10 # we add a small offset to avoid definiteness problems sigma0 = (V * w).dot(V.T) try: if HAS_SKGGM: theta0 = pinvh(sigma0) M, _, _, _, _, _ = quic(emp_cov, lam=self.sparsity_param, msg=self.verbose, Theta0=theta0, Sigma0=sigma0) else: _, M = graphical_lasso(emp_cov, alpha=self.sparsity_param, verbose=self.verbose, cov_init=sigma0) raised_error = None w_mahalanobis, _ = np.linalg.eigh(M) not_spd = any(w_mahalanobis < 0.) not_finite = not np.isfinite(M).all() except Exception as e: raised_error = e not_spd = False # not_spd not applicable here so we set to False not_finite = False # not_finite not applicable here so we set to False if raised_error is not None or not_spd or not_finite: msg = ("There was a problem in SDML when using {}'s graphical " "lasso solver." ).format("skggm" if HAS_SKGGM else "scikit-learn") if not HAS_SKGGM: skggm_advice = ( " skggm's graphical lasso can sometimes converge " "on non SPD cases where scikit-learn's graphical " "lasso fails to converge. Try to install skggm and " "rerun the algorithm (see the README.md for the " "right version of skggm).") msg += skggm_advice if raised_error is not None: msg += " The following error message was thrown: {}.".format( raised_error) raise RuntimeError(msg) self.transformer_ = transformer_from_metric(np.atleast_2d(M)) return self