def _nonrobust_covariance(self, data, assume_centered=False): """Non-robust estimation of the covariance to be used within MCD. Parameters ---------- data: array_like, shape (n_samples, n_features) Data for which to compute the non-robust covariance matrix. assume_centered: Boolean Whether or not the observations should be considered as centered. Returns ------- nonrobust_covariance: array_like, shape (n_features, n_features) The non-robust covariance of the data. """ try: cov, prec = graph_lasso( empirical_covariance(data, assume_centered=assume_centered), self.shrinkage) except: print " > Exception!" emp_cov = empirical_covariance( data, assume_centered=assume_centered) emp_cov.flat[::data.shape[1] + 1] += 1e-06 cov, prec = graph_lasso(emp_cov, self.shrinkage) return cov
def _nonrobust_covariance(self, data, assume_centered=False): """Non-robust estimation of the covariance to be used within MCD. Parameters ---------- data: array_like, shape (n_samples, n_features) Data for which to compute the non-robust covariance matrix. assume_centered: Boolean Whether or not the observations should be considered as centered. Returns ------- nonrobust_covariance: array_like, shape (n_features, n_features) The non-robust covariance of the data. """ try: cov, prec = graph_lasso( empirical_covariance(data, assume_centered=assume_centered), self.shrinkage) except: print " > Exception!" emp_cov = empirical_covariance(data, assume_centered=assume_centered) emp_cov.flat[::data.shape[1] + 1] += 1e-06 cov, prec = graph_lasso(emp_cov, self.shrinkage) return cov
def test_graph_lasso(random_state=0): # Sample data from a sparse multivariate normal dim = 20 n_samples = 100 random_state = check_random_state(random_state) prec = make_sparse_spd_matrix(dim, alpha=.95, random_state=random_state) cov = linalg.inv(prec) X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples) emp_cov = empirical_covariance(X) for alpha in (.1, .01): covs = dict() for method in ('cd', 'lars'): cov_, _, costs = graph_lasso(emp_cov, alpha=.1, return_costs=True) covs[method] = cov_ costs, dual_gap = np.array(costs).T # Check that the costs always decrease assert_array_less(np.diff(costs), 0) # Check that the 2 approaches give similar results assert_array_almost_equal(covs['cd'], covs['lars']) # Smoke test the estimator model = GraphLasso(alpha=.1).fit(X) assert_array_almost_equal(model.covariance_, covs['cd'])
def _glasso(X, lamb, return_precision=False, **kwargs): """Learn the structure of Markov random field with the graphical lasso. This function internally calls the implementation in scikit-learn. Parameters ---------- X : array, shape (n_samples, n_features) Observations of variables. lamb : float Regularization parameter. return_precision : bool, default False If True, the estimated precision matrix will be returned instead of adjacency matrix. Returns ---------- adj : array, shape (n_features, n_features) Estimated adjacency matrix (or precision matrix if ``return_precision`` is True) of an MRF. """ cov = np.cov(scale(X), rowvar=False) kwargs = {k: v for k, v in kwargs.items() if k in graph_lasso.__code__.co_varnames} pre = graph_lasso(cov, alpha=lamb, **kwargs)[1] if return_precision: return pre else: adj = ~np.isclose(pre, 0) adj[np.eye(len(adj), dtype=bool)] = 0 return adj
def test_graph_lasso_iris_singular(): # Small subset of rows to test the rank-deficient case # Need to choose samples such that none of the variances are zero indices = np.arange(10, 13) # Hard-coded solution from R glasso package for alpha=0.01 cov_R = np.array([ [0.08, 0.056666662595, 0.00229729713223, 0.00153153142149], [0.056666662595, 0.082222222222, 0.00333333333333, 0.00222222222222], [0.002297297132, 0.003333333333, 0.00666666666667, 0.00009009009009], [0.001531531421, 0.002222222222, 0.00009009009009, 0.00222222222222] ]) icov_R = np.array([ [24.42244057, -16.831679593, 0.0, 0.0], [-16.83168201, 24.351841681, -6.206896552, -12.5], [0.0, -6.206896171, 153.103448276, 0.0], [0.0, -12.499999143, 0.0, 462.5] ]) X = datasets.load_iris().data[indices, :] emp_cov = empirical_covariance(X) for method in ('cd', 'lars'): cov, icov = graph_lasso(emp_cov, alpha=0.01, return_costs=False, mode=method) assert_array_almost_equal(cov, cov_R, decimal=5) assert_array_almost_equal(icov, icov_R, decimal=5)
def glassoBonaFidePartial(gl, X, TrueCov): #take a ep = EmpiricalCovariance().fit(X) emp_cov = ep.covariance_ _, precs = graph_lasso_path(X, gl.cv_alphas_) best_score = -np.inf best_ind = 0 for i in xrange(len(gl.cv_alphas_)): try: this_score = log_likelihood(TrueCov, precs[i]) if this_score >= .1 / np.finfo(np.float64).eps: this_score = np.nan if (this_score > best_score): best_score = this_score best_ind = i except: print 'exited:', best_score continue covariance_, precision_, n_iter_ = graph_lasso( emp_cov, alpha=gl.cv_alphas_[best_ind], mode=gl.mode, tol=gl.tol * 5., max_iter=gl.max_iter, return_n_iter=True) return np.abs(toPartialCorr(precision_))
def getPrecision(D, Z, rho): """M step in EM algorithm - get sparse precision matrices! INPUT: - D: list of datasets - Z: latent allocation matrix - rho: regulariation parameter """ K = Z.shape[1] # number of clusters p = D[0].shape[1] # number of ROIs newTheta = numpy.zeros((K, p, p)) for i in range(K): # get covariance for ith cluster: S = numpy.zeros((p,p)) for j in range(len(D)): S += numpy.cov(D[j], rowvar=0, bias=1) * Z[j,i] S /= Z[:,i].sum() newTheta[i,:,:] = graph_lasso(emp_cov = S, alpha=rho)[1] return newTheta
def UpdateSubject(self, subjectCount = None): """ Update parameters based on given subject Input: - subjectCount: which element from self.Files should be added next. If is None, then self.SubjectCount is used """ if subjectCount==None: subjectCount = self.SubjectCount if self.verbose: print "Updating with Subject: "+str(subjectCount) if self.FileType=='str': # load in subjects data: newData = scale(loadData(self.Files[subjectCount])) else: # data already loaded newData = scale(self.Files[subjectCount]) # begin update: convergence = False iter_ = 0 #Theta = numpy.copy(self.Theta) ThetaOld = numpy.copy(self.Theta) p = self.Theta.shape[1] # number of nodes K = self.Theta.shape[0] # number of clusters # run EM algorithm for new subject! while (convergence==False) & (iter_ < self.max_iter): ## -- E step (i.e., choose which cluster this subject belongs to): wNew = numpy.array([DlogLike(newData, Pres=self.Theta[x,:,:]) for x in range(K)]) wNew -= max(wNew) # center around maximum value (to avoid floating point errors) wNew = numpy.exp(wNew) * self.w # exponentiate and multiply by mixing probabilities # finally normalise wNew /= sum(wNew) wUpdate = wNew + self.w # updated mixing distribution (unnormalised!) ## -- M step (i.e., update precision estimates for each cluster) Snew = numpy.zeros((K,p,p)) for k in range(K): Snew[k,:,:] = (self.S[k,:,:] * self.w[k] + wNew[k] * numpy.cov(newData, rowvar=0, bias=1) )/wUpdate[k] self.Theta[k,:,:] = graph_lasso(emp_cov = Snew[k,:,:], alpha=self.rho)[1] # check convergence: if abs(self.Theta-ThetaOld).sum() < self.tol: convergence=True # update all sample statistics that we're storing, starting with covariance: for i in range(K): self.S[k,:,:] = numpy.copy(Snew[k,:,:]) self.w = numpy.copy(wUpdate) self.Z = numpy.vstack((self.Z, wNew)) self.SubjectCount += 1 # keep track of which subject we are on self.C = numpy.hstack((self.C, wNew.argmax())) else: iter_ += 1 ThetaOld = numpy.copy(self.Theta)
def fit(self, balance_param=0.5, sparsity_param=0.01, verbose=False): ''' balance_param: trades off between sparsity and M0 prior sparsity_param: trades off between optimizer and sparseness (see graph_lasso) ''' P = pinvh(self.M) + balance_param * self.loss_matrix emp_cov = pinvh(P) # hack: ensure positive semidefinite emp_cov = emp_cov.T.dot(emp_cov) self.M, _ = graph_lasso(emp_cov, sparsity_param, verbose=verbose)
def _learn_sparse_gaussian(data, l1_penalty_range, l1_search_depth, l1_search_repeat, atol): ''' Learn single sparse Gaussian Graphical Model using graphical lasso L1-parameter search is done by heuristic method using column shuffling :param data: array of shape(n_samples, n_dimensions) :param l1_penalty_range: min and max values for l1_penalty search :param l1_search_depth: number of depth in binary search for l1_penalty :param l1_search_repeat: number of trials for l1_penalty search :param atol: absolute tolerance for recovering zero or non-zero entries in precision matrix :return: covarinace matrix, precision matrix, final l1_penalty ''' n_frames, n_vars = data.shape # copy data for column shuffling data_s = data.copy() l1_penalty_sum = 0. for i in range(l1_search_repeat): l1_min, l1_max = l1_penalty_range l1_mid = (l1_min + l1_max) / 2 for j in range(l1_search_depth): # shuffle columns for p in range(n_vars): data_s[:, p] = data_s[np.random.permutation(n_frames), p] cov, pre = graph_lasso(np.cov(data_s.T), l1_mid) # conservative binary search on l1_param if ((np.isclose(pre, 0, atol=atol) == False) == np.eye(n_vars)).all(): l1_max = (l1_max + l1_mid) / 2 else: l1_min = (l1_min + l1_mid) / 2 l1_mid = (l1_min + l1_max) / 2 l1_penalty_sum += l1_mid l1_penalty = l1_penalty_sum / l1_search_repeat # final graphical lasso on the original data cov, pre = graph_lasso(np.cov(data.T), l1_penalty) pre[np.abs(pre) < atol] = 0. cov = np.linalg.inv(pre) return cov, pre, l1_penalty
def fit(self, X, W, verbose=False): """ X: data matrix, (n x d) W: connectivity graph, (n x n). +1 for positive pairs, -1 for negative. """ self._prepare_inputs(X, W) P = pinvh(self.M) + self.balance_param * self.loss_matrix emp_cov = pinvh(P) # hack: ensure positive semidefinite emp_cov = emp_cov.T.dot(emp_cov) self.M, _ = graph_lasso(emp_cov, self.sparsity_param, verbose=verbose) return self
def fit(self, X, W): """ X: data matrix, (n x d) each row corresponds to a single instance W: connectivity graph, (n x n). +1 for positive pairs, -1 for negative. """ self._prepare_inputs(X, W) P = pinvh(self.M) + self.params['balance_param'] * self.loss_matrix emp_cov = pinvh(P) # hack: ensure positive semidefinite emp_cov = emp_cov.T.dot(emp_cov) self.M, _ = graph_lasso(emp_cov, self.params['sparsity_param'], verbose=self.params['verbose']) return self
def fit(self, X, W): """ X: data matrix, (n x d) each row corresponds to a single instance W: connectivity graph, (n x n) +1 for positive pairs, -1 for negative. """ self._prepare_inputs(X, W) P = pinvh(self.M) + self.params['balance_param'] * self.loss_matrix emp_cov = pinvh(P) # hack: ensure positive semidefinite emp_cov = emp_cov.T.dot(emp_cov) self.M, _ = graph_lasso(emp_cov, self.params['sparsity_param'], verbose=self.params['verbose']) return self
def myglasso(X_list, ix_product, set_length): class_ix, time_ix = ix_product cov_last = None alpha_max_ct = alpha_max(genEmpCov(X_list[class_ix][time_ix].T)) alpha_set = np.logspace(np.log10(alpha_max_ct * 5e-2), np.log10(alpha_max_ct), set_length) result = [] for alpha in alpha_set: emp_cov = genEmpCov(X_list[class_ix][time_ix].T) ml_glasso = cov.graph_lasso(emp_cov, alpha, cov_init=cov_last, verbose=True) cov_last = ml_glasso[0] result.append(ml_glasso[1]) return result
def fit(self, X, W=None): ''' X: data matrix, (n x d) each row corresponds to a single instance Must be shifted to zero already. W: connectivity graph, (n x n) +1 for positive pairs, -1 for negative. ''' print('SDML.fit ...', numpy.shape(X)) self.mean_ = numpy.mean(X, axis=0) X = numpy.matrix(X - self.mean_) # set up prior M #print 'X', X.shape if self.use_cov: M = np.cov(X.T) else: M = np.identity(X.shape[1]) if W is None: W = np.ones((X.shape[1], X.shape[1])) #print 'W', W.shape L = laplacian(W, normed=False) #print 'L', L.shape inner = X.dot(L.T) loss_matrix = inner.T.dot(X) #print 'loss', loss_matrix.shape #print 'pinv', pinvh(M).shape P = pinvh(M) + self.balance_param * loss_matrix #print 'P', P.shape emp_cov = pinvh(P) # hack: ensure positive semidefinite emp_cov = emp_cov.T.dot(emp_cov) M, _ = graph_lasso(emp_cov, self.sparsity_param, verbose=self.verbose) self.M = M C = numpy.linalg.cholesky(self.M) self.dewhiten_ = C self.whiten_ = numpy.linalg.inv(C) # U: rotation matrix, S: scaling matrix #U, S, _ = scipy.linalg.svd(M) #s = np.sqrt(S.clip(self.EPS)) #s_inv = np.diag(1./s) #s = np.diag(s) #self.whiten_ = np.dot(np.dot(U, s_inv), U.T) #self.dewhiten_ = np.dot(np.dot(U, s), U.T) #print 'M:', M print('SDML.fit done')
def test_graph_lasso(random_state=0): # Sample data from a sparse multivariate normal dim = 20 n_samples = 100 random_state = check_random_state(random_state) prec = make_sparse_spd_matrix(dim, alpha=.95, random_state=random_state) cov = linalg.inv(prec) X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples) emp_cov = empirical_covariance(X) for alpha in (0., .1, .25): covs = dict() icovs = dict() for method in ('cd', 'lars'): cov_, icov_, costs = graph_lasso(emp_cov, alpha=alpha, mode=method, return_costs=True) covs[method] = cov_ icovs[method] = icov_ costs, dual_gap = np.array(costs).T # Check that the costs always decrease (doesn't hold if alpha == 0) if not alpha == 0: assert_array_less(np.diff(costs), 0) # Check that the 2 approaches give similar results assert_array_almost_equal(covs['cd'], covs['lars'], decimal=3) assert_array_almost_equal(icovs['cd'], icovs['lars'], decimal=3) # Smoke test the estimator model = GraphLasso(alpha=.25).fit(X) model.score(X) assert_array_almost_equal(model.covariance_, covs['cd'], decimal=3) assert_array_almost_equal(model.covariance_, covs['lars'], decimal=3) # For a centered matrix, assume_centered could be chosen True or False # Check that this returns indeed the same result for centered data Z = X - X.mean(0) precs = list() for assume_centered in (False, True): prec_ = GraphLasso(assume_centered=assume_centered).fit(Z).precision_ precs.append(prec_) assert_array_almost_equal(precs[0], precs[1])
def test_graph_lasso_iris(): # Hard-coded solution from R glasso package for alpha=1.0 # (need to set penalize.diagonal to FALSE) cov_R = np.array([[0.68112222, 0.0000000, 0.265820, 0.02464314], [0.00000000, 0.1887129, 0.000000, 0.00000000], [0.26582000, 0.0000000, 3.095503, 0.28697200], [0.02464314, 0.0000000, 0.286972, 0.57713289]]) icov_R = np.array([[1.5190747, 0.000000, -0.1304475, 0.0000000], [0.0000000, 5.299055, 0.0000000, 0.0000000], [-0.1304475, 0.000000, 0.3498624, -0.1683946], [0.0000000, 0.000000, -0.1683946, 1.8164353]]) X = datasets.load_iris().data emp_cov = empirical_covariance(X) for method in ('cd', 'lars'): cov, icov = graph_lasso(emp_cov, alpha=1.0, return_costs=False, mode=method) assert_array_almost_equal(cov, cov_R) assert_array_almost_equal(icov, icov_R)
def test_graph_lasso(random_state=0): # Sample data from a sparse multivariate normal dim = 20 n_samples = 100 random_state = check_random_state(random_state) prec = make_sparse_spd_matrix(dim, alpha=.95, random_state=random_state) cov = linalg.inv(prec) X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples) emp_cov = empirical_covariance(X) for alpha in (0., .1, .25): covs = dict() icovs = dict() for method in ('cd', 'lars'): cov_, icov_, costs = graph_lasso(emp_cov, alpha=alpha, mode=method, return_costs=True) covs[method] = cov_ icovs[method] = icov_ costs, dual_gap = np.array(costs).T # Check that the costs always decrease (doesn't hold if alpha == 0) if not alpha == 0: assert_array_less(np.diff(costs), 0) # Check that the 2 approaches give similar results assert_array_almost_equal(covs['cd'], covs['lars'], decimal=4) assert_array_almost_equal(icovs['cd'], icovs['lars'], decimal=4) # Smoke test the estimator model = GraphLasso(alpha=.25).fit(X) model.score(X) assert_array_almost_equal(model.covariance_, covs['cd'], decimal=4) assert_array_almost_equal(model.covariance_, covs['lars'], decimal=4) # For a centered matrix, assume_centered could be chosen True or False # Check that this returns indeed the same result for centered data Z = X - X.mean(0) precs = list() for assume_centered in (False, True): prec_ = GraphLasso(assume_centered=assume_centered).fit(Z).precision_ precs.append(prec_) assert_array_almost_equal(precs[0], precs[1])
def test_graph_lasso_iris(): # Hard-coded solution from R glasso package for alpha=1.0 # The iris datasets in R and scikit-learn do not match in a few places, # these values are for the scikit-learn version. cov_R = np.array([[0.68112222, 0.0, 0.2651911, 0.02467558], [0.00, 0.1867507, 0.0, 0.00], [0.26519111, 0.0, 3.0924249, 0.28774489], [0.02467558, 0.0, 0.2877449, 0.57853156]]) icov_R = np.array([[1.5188780, 0.0, -0.1302515, 0.0], [0.0, 5.354733, 0.0, 0.0], [-0.1302515, 0.0, 0.3502322, -0.1686399], [0.0, 0.0, -0.1686399, 1.8123908]]) X = datasets.load_iris().data emp_cov = empirical_covariance(X) for method in ('cd', 'lars'): cov, icov = graph_lasso(emp_cov, alpha=1.0, return_costs=False, mode=method) assert_array_almost_equal(cov, cov_R) assert_array_almost_equal(icov, icov_R)
def _fit(self, pairs, y): pairs, y = self._prepare_inputs(pairs, y, type_of_inputs='tuples') # set up prior M if self.use_cov: X = np.vstack( {tuple(row) for row in pairs.reshape(-1, pairs.shape[2])}) M = pinvh(np.atleast_2d(np.cov(X, rowvar=False))) else: M = np.identity(pairs.shape[2]) diff = pairs[:, 0] - pairs[:, 1] loss_matrix = (diff.T * y).dot(diff) P = M + self.balance_param * loss_matrix emp_cov = pinvh(P) # hack: ensure positive semidefinite emp_cov = emp_cov.T.dot(emp_cov) _, M = graph_lasso(emp_cov, self.sparsity_param, verbose=self.verbose) self.transformer_ = transformer_from_metric(M) return self
def fit(self, X, W): """Learn the SDML model. Parameters ---------- X : array-like, shape (n, d) data matrix, where each row corresponds to a single instance W : array-like, shape (n, n) connectivity graph, with +1 for positive pairs and -1 for negative Returns ------- self : object Returns the instance. """ loss_matrix = self._prepare_inputs(X, W) P = self.M_ + self.balance_param * loss_matrix emp_cov = pinvh(P) # hack: ensure positive semidefinite emp_cov = emp_cov.T.dot(emp_cov) _, self.M_ = graph_lasso(emp_cov, self.sparsity_param, verbose=self.verbose) return self
def test_graph_lasso_iris(): # Hard-coded solution from R glasso package for alpha=1.0 # (need to set penalize.diagonal to FALSE) cov_R = np.array([ [0.68112222, 0.0000000, 0.265820, 0.02464314], [0.00000000, 0.1887129, 0.000000, 0.00000000], [0.26582000, 0.0000000, 3.095503, 0.28697200], [0.02464314, 0.0000000, 0.286972, 0.57713289] ]) icov_R = np.array([ [1.5190747, 0.000000, -0.1304475, 0.0000000], [0.0000000, 5.299055, 0.0000000, 0.0000000], [-0.1304475, 0.000000, 0.3498624, -0.1683946], [0.0000000, 0.000000, -0.1683946, 1.8164353] ]) X = datasets.load_iris().data emp_cov = empirical_covariance(X) for method in ('cd', 'lars'): cov, icov = graph_lasso(emp_cov, alpha=1.0, return_costs=False, mode=method) assert_array_almost_equal(cov, cov_R) assert_array_almost_equal(icov, icov_R)
def _skeptic(X, lamb, return_precision=False, **kwargs): """Learn the structure of Markov random field with nonparanormal SKEPTIC using Spearman’s rho [liu2012high]_. Parameters ---------- X : array, shape (n_samples, n_features) Observations of variables. lamb : float Regularization parameter. return_precision : bool, default False If True, the estimated precision matrix will be returned instead of adjacency matrix. Returns ---------- adj : array, shape (n_features, n_features) Estimated adjacency matrix (or precision matrix if ``return_precision`` is True) of an MRF. References ---------- .. [liu2012high] Liu, Han, et al. "High-dimensional semiparametric Gaussian copula graphical models." The Annals of Statistics 40.4 (2012): 2293-2326. """ n, d = X.shape indices = np.argsort(X, axis=0) rank = np.empty_like(indices) for r, idx in zip(rank.T, indices.T): r[idx] = np.arange(1, len(X) + 1) - (n + 1) / 2 rho = rank.T @ rank stds = np.sqrt(np.diag(rho)) rho = rho / stds.reshape(1, -1) / stds.reshape(-1, 1) cov = 2 * np.sin(np.pi / 6 * rho) cov[np.eye(d, dtype=bool)] = 1 pre = graph_lasso(cov, lamb)[1] if return_precision: return pre else: adj = ~np.isclose(pre, 0) adj[np.eye(len(adj), dtype=bool)] = 0 return adj
def test_graph_lasso_iris(): # Hard-coded solution from R glasso package for alpha=1.0 # The iris datasets in R and scikit-learn do not match in a few places, # these values are for the scikit-learn version. cov_R = np.array([ [0.68112222, 0.0, 0.2651911, 0.02467558], [0.00, 0.1867507, 0.0, 0.00], [0.26519111, 0.0, 3.0924249, 0.28774489], [0.02467558, 0.0, 0.2877449, 0.57853156] ]) icov_R = np.array([ [1.5188780, 0.0, -0.1302515, 0.0], [0.0, 5.354733, 0.0, 0.0], [-0.1302515, 0.0, 0.3502322, -0.1686399], [0.0, 0.0, -0.1686399, 1.8123908] ]) X = datasets.load_iris().data emp_cov = empirical_covariance(X) for method in ('cd', 'lars'): cov, icov = graph_lasso(emp_cov, alpha=1.0, return_costs=False, mode=method) assert_array_almost_equal(cov, cov_R) assert_array_almost_equal(icov, icov_R)
def Shuheng_method(X_list, ix_product, set_length, sigma, width=5, knownMean=False, m=0): class_ix, time_ix = ix_product cov_last = None alpha_max_ct = alpha_max(genEmpCov(X_list[class_ix][time_ix].T)) alpha_set = np.logspace(np.log10(alpha_max_ct * 5e-2), np.log10(alpha_max_ct), set_length) result = [] for alpha in alpha_set: emp_cov = genEmpCov_kernel(time_ix, sigma, width, X_list[class_ix], knownMean, m) ml_glasso = cov.graph_lasso(emp_cov, alpha, cov_init=cov_last, verbose=True) cov_last = ml_glasso[0] result.append(ml_glasso[1]) print(alpha) return result
def windowed_fc(self, window_tp=60, step=1, sigma=20, method='corr'): subs = self.all_subs overlap = window_tp - step n_ic_components = int(np.size(subs, 1)) n_sub = int(np.size(subs, 0) / self.tp) n_window = int((self.tp - overlap) / (window_tp - overlap)) cov_mat = (np.zeros((n_sub, n_window - 1, (n_ic_components * (n_ic_components - 1)) // 2))) # netmat = np.zeros((n_sub, n_ic_components*n_ic_components)) gaus_win = signal.gaussian((window_tp), std=sigma) rect = np.ones(window_tp) conv_gaus = signal.convolve(rect, gaus_win, 'same') conv_gaus = conv_gaus / conv_gaus.max() for j in range(n_sub): sub = subs[j * self.tp:(j + 1) * self.tp, :] for i in range(n_window - 1): sub_window = sub[i * (window_tp - overlap):(i + 1) * (window_tp - overlap) + overlap, :] sub_window = np.multiply( sub_window, np.tile(conv_gaus, (np.size(sub_window, 1), 1)).T) if method == 'corr': cov_1 = np.corrcoef(sub_window.T) cov_1[np.eye(n_ic_components) > 0] = 0 cov_1 = self.mat2vec(cov_1) cov_1 = np.arctanh(cov_1) # cov_1=cov_1-np.mean(cov_1) # cov_1=cov_1/np.std(cov_1) cov_mat[j, i, :] = cov_1 if method == 'part_corr': cov_1 = np.cov(sub_window.T) cov_1 = -np.linalg.inv(cov_1) cov_1 = (cov_1 / np.tile(np.sqrt(np.abs(np.diag(cov_1))), (n_ic_components, 1)).T) / np.tile( np.sqrt(np.abs(np.diag(cov_1))), (n_ic_components, 1)) cov_1[np.eye(n_ic_components) > 0] = 0 cov_1 = self.mat2vec(cov_1) cov_1 = np.arctanh(cov_1) # cov_1=cov_1-np.mean(cov_1) # cov_1=cov_1/np.std(cov_1) cov_mat[j, i, :] = cov_1 elif method == 'reg_pc': cov_true = np.cov(sub_window.T) cov_true = cov_true / np.mean(np.diag(cov_true)) # GL = GraphLassoCV() # gl_fit = GL.fit(cov_true) # cov_1 = gl_fit.get_precision() cov, cov_1 = graph_lasso(cov_true, 0.00001, max_iter=500, tol=0.005) cov_1 = -((cov_1 / np.tile(np.sqrt(np.abs(np.diag(cov_1))), (n_ic_components, 1)).T) / np.tile(np.sqrt(np.abs(np.diag(cov_1))), (n_ic_components, 1))) cov_1[np.eye(n_ic_components) > 0] = 0 cov_1 = self.mat2vec(cov_1) cov_1 = np.arctanh(cov_1) cov_mat[j, i, :] = cov_1 elif method == 'ridge_pc': cov_true = np.cov(sub_window.T) cov_true = cov_true / np.sqrt( np.mean(np.diag(np.square(cov_true)))) cov_1 = -np.linalg.inv(cov_true + 0.1 * np.eye(n_ic_components)) cov_1 = (cov_1 / np.tile(np.sqrt(np.abs(np.diag(cov_1))), (n_ic_components, 1)).T) / np.tile( np.sqrt(np.abs(np.diag(cov_1))), (n_ic_components, 1)) cov_1[np.eye(n_ic_components) > 0] = 0 cov_1 = self.mat2vec(cov_1) cov_1 = np.arctanh(cov_1) # cov_1=cov_1-np.mean(cov_1) # cov_1=cov_1/np.std(cov_1) cov_mat[j, i, :] = cov_1 self.covariance_mat = cov_mat
# F1_result_c.append(F1_result_t) # F1_result_list.append(F1_result_c) Theta_glasso_list = [] # 1 dim: class_ix; 2 dim: time_ix; 3 dim: alpha for class_ix in range(len_class): Theta_glasso_c = [] for time_ix in range(len_t): Theta_glasso_t = [] cov_last = None alpha_max_ct = alpha_max(genEmpCov(X_list[class_ix][time_ix].T)) alpha_set = np.logspace(np.log10(alpha_max_ct * 5e-2), np.log10(alpha_max_ct), set_length) for alpha in alpha_set: emp_cov = genEmpCov(X_list[class_ix][time_ix].T) ml_glasso = cov.graph_lasso(emp_cov, alpha, cov_init=cov_last, max_iter=500) cov_last = ml_glasso[0] Theta_glasso_t.append(ml_glasso[1]) Theta_glasso_c.append(Theta_glasso_t) Theta_glasso_list.append(Theta_glasso_c) #----------------------------------------------------------------------------------------------------- #----------------------------------- Shuheng's method ------------------------------------------------ beta = 0 indexOfPenalty = 1 set_length = 51 alpha_set = np.logspace(np.log10(0.9 * 5e-2), np.log10(0.9), set_length) Theta_Shuheng_list = [ ] # first dim: class_ix; second dim: alpha; third dim: time_ix
def RobustLassoFPReg(X, Z, p, nu, tol, lambda_beta=0, lambda_phi=0, flag_rescale=0): # Robust Regression - Max-Likelihood with Flexible Probabilites & Shrinkage # (multivariate Student t distribution with given degrees of freedom = nu) # INPUTS # X : [matrix] (n_ x t_end ) historical series of dependent variables # Z : [matrix] (k_ x t_end) historical series of independent variables # p : [vector] flexible probabilities # nu : [scalar] multivariate Student's t degrees of freedom # tol : [scalar] or [vector] (3 x 1) tolerance, needed to check convergence # lambda_beta : [scalar] lasso regression parameter # lambda_phi : [scalar] graphical lasso parameter # flag_rescale : [boolean flag] if 0 (default), the series is not rescaled # # OPS # alpha_RMLFP : [vector] (n_ x 1) shifting term # beta_RMLFP : [matrix] (n_ x k_) optimal loadings # sig2_RMLFP : [matrix] (n_ x n_) matrix of residuals.T covariances # For details on the exercise, see here . ## Code [n_, t_] = X.shape k_ = Z.shape[0] # if FP are not provided, observations are equally weighted if p is None: p = ones((1, t_)) / t_ # adjust tolerance input if isinstance(tol, float): tol = [tol, tol, tol] # rescale variables if flag_rescale == 1: _, cov_Z = FPmeancov(Z, p) sig_Z = sqrt(diag(cov_Z)) _, cov_X = FPmeancov(X, p) sig_X = sqrt(diag(cov_X)) Z = np.diagflat(1 / sig_Z) @ Z X = np.diagflat(1 / sig_X) @ X # initialize variables alpha = zeros((n_, 1)) beta = zeros((n_, k_, 1)) sig2 = zeros((n_, n_, 1)) # 0. Initialize alpha[:, [0]], beta[:, :, [0]], sig2[:, :, [0]], U = LassoRegFP(X, Z, p, 0, 0) error = ones(3) * 10**6 maxIter = 500 i = 0 while any(error > tol) and (i < maxIter): i = i + 1 # 1. Update weights z2 = np.atleast_2d(U).T @ (solve(sig2[:, :, i - 1], np.atleast_2d(U))) w = (nu + n_) / (nu + diag(z2).T) # 2. Update FP p_tilde = (p * w) / npsum(p * w) # 3. Update output # Lasso regression new_alpha, new_beta, new_sig2, U = LassoRegFP(X, Z, p_tilde, lambda_beta) new_beta = new_beta.reshape(n_, k_, 1) new_sig2 = new_sig2.reshape(n_, n_, 1) U = U.squeeze() alpha = r_['-1', alpha, new_alpha] beta = r_['-1', beta, new_beta] sig2 = r_['-1', sig2, new_sig2] sig2[:, :, i] = npsum(p * w) * sig2[:, :, i] # Graphical lasso if lambda_phi != 0: sig2[:, :, i], _, _, _ = graph_lasso(sig2[:, :, i], lambda_phi) # 3. Check convergence error[0] = norm(alpha[:, i] - alpha[:, i - 1]) / norm(alpha[:, i - 1]) error[1] = norm(beta[:, :, i] - beta[:, :, i - 1], ord='fro') / norm( beta[:, :, i - 1], ord='fro') error[2] = norm(sig2[:, :, i] - sig2[:, :, i - 1], ord='fro') / norm( sig2[:, :, i - 1], ord='fro') # Output alpha_RMLFP = alpha[:, -1] beta_RMLFP = beta[:, :, -1] sig2_RMLFP = sig2[:, :, -1] # From rescaled variables to non-rescaled variables if flag_rescale == 1: alpha_RMLFP = diag(sig_X) @ alpha_RMLFP beta_RMLFP = diag(sig_X) @ beta_RMLFP @ diag(1 / sig_Z) sig2_RMLFP = diag(sig_X) @ sig2_RMLFP @ diag(sig_X).T return alpha_RMLFP, beta_RMLFP, sig2_RMLFP
def glasso(A, rho): """ Applies the graphical lasso method to A with penalty rho. """ assert(A.shape[0] == A.shape[1]), 'Mismatched dimensions of A' return graph_lasso(A, rho, max_iter=100)[0]
import sys sys.path.append("..") sys.path.append("../inverse_covariance") from sklearn.covariance import graph_lasso from inverse_covariance import quic import numpy as np ############################################################################# # Example 1 # graph_lasso fails to converge at lam = .009 * np.max(np.abs(Shat)) X = np.loadtxt("data/Mazumder_example1.txt", delimiter=",") Shat = np.cov(X, rowvar=0) try: graph_lasso(Shat, alpha=.004) except FloatingPointError as e: print("{0}".format(e)) vals = quic(Shat, .004) ############################################################################# # Example 2 # graph_lasso fails to converge at lam = .009 * np.max(np.abs(Shat)) X = np.loadtxt("data/Mazumder_example2.txt", delimiter=",") Shat = np.cov(X, rowvar=0) try: graph_lasso(Shat, alpha=.02) except FloatingPointError as e: print("{0}".format(e)) vals = quic(Shat, .02)
def sparse_metric(X, S, D, eta, alpha): precision = sparse_metric_as_prec(X, S, D, eta=eta) emp_cov = pinvh(precision) covariance, _ = graph_lasso(emp_cov, alpha, verbose=True) return covariance
""" import numpy as np import numpy.linalg as alg import sklearn.covariance as cov from myfunctions import * from posdef import * lam1 = 0.5 lam2 = 0.5 lam_vec = [lam1, lam2, lam2, lam2, lam2] #initialize Omega_list = [ cov.graph_lasso(S_pd_list[k], lam_vec[k], verbose=False)[1] for k in range(K + 1) ] A = np.zeros([p, p]) for k in range(K): A = A + Omega_list[k] A_inv = alg.inv(A) likelihood_K = 0 for k in range(K): likelihood_K = likelihood_K + np.log(alg.det( Omega_list[k + 1])) - np.trace( np.matmul( S_Y[k * p + np.array(range(p))[:, None], k * p + np.array(range(p))[None, :]], Omega_list[k + 1]))
def EM_xie_time_varying(S_Y, p, K, set_length): # S_Y empirical covariance # set_length: length for lambdas S_0 = np.zeros([p, p]) # Sigma_0 for m in range(K): for l in [i for i in range(K) if i != m]: S_ml = S_Y[m * p + np.array(range(p))[:, None], l * p + np.array(range(p))[None, :]] S_0 = S_0 + S_ml S_0 = S_0 / ((K - 1) * K) S_K_list = [] for k in range(K): S_k = S_Y[k * p + np.array(range(p))[:, None], k * p + np.array(range(p))[None, :]] S_k = S_k - S_0 S_K_list.append(S_k) S_hat_list = [S_0] + S_K_list S_pd0_list = [] # closest PSD for k in range(K + 1): S_pd0_list.append(nearestPD(S_hat_list[k])) lam_max_vec = [alpha_max(item) for item in S_pd0_list] lam1_max = lam_max_vec[0] lam2_max = max(lam_max_vec[1:]) lam1_vec = np.logspace(np.log10(lam1_max * 5e-1), np.log10(lam1_max), set_length)[::-1] lam2_vec = np.logspace(np.log10(lam2_max * 5e-1), np.log10(lam2_max), set_length)[::-1] lam_product = itertools.product(lam1_vec, lam2_vec) lam_product_list = list(lam_product) Omega_grid_list = [] for i in range(set_length): lam1_0 = lam_product_list[set_length * i][0] lam2_0 = lam_product_list[set_length * i][1] lam_vec = [lam1_0] + list(np.repeat(lam2_0, K)) #initialize Omega_list = [ cov.graph_lasso(S_pd0_list[k], lam_vec[k], verbose=False)[1] for k in range(K + 1) ] A = np.zeros([p, p]) for k in range(K): A = A + Omega_list[k] A_inv = alg.inv(A) likelihood_K = 0 for k in range(K): likelihood_K = likelihood_K + np.log(alg.det( Omega_list[k + 1])) - np.trace( np.matmul( S_Y[k * p + np.array(range(p))[:, None], k * p + np.array(range(p))[None, :]], Omega_list[k + 1])) tr_OSOA = 0 for m in range(K): for l in range(K): OSOA = np.matmul( np.matmul( np.matmul( Omega_list[m + 1], S_Y[m * p + np.array(range(p))[:, None], l * p + np.array(range(p))[None, :]]), Omega_list[l + 1]), A_inv) tr_OSOA = tr_OSOA + np.trace(OSOA) likelihood = likelihood_K + np.log(alg.det(Omega_list[0])) - np.log( alg.det(A)) + tr_OSOA penalty = 0 for k in range(K + 1): penalty = penalty + lam_vec[k] * np.sum( np.abs(np.tril(Omega_list[k], -1) + np.triu(Omega_list[k], 1))) pen_likelihood = likelihood - penalty Omega_lam1_list = [] for j in range(set_length): lam2 = lam_product_list[set_length * i + j][1] lam_vec = [lam1_0] + list(np.repeat(lam2, K)) pen_likelihood0 = 0 while np.abs(pen_likelihood - pen_likelihood0) > 1e-4: OSO = np.zeros([p, p]) S_K_list = [] for m in range(K): SO = np.zeros([p, p]) OS = np.zeros([p, p]) for l in range(K): S_ml = S_Y[m * p + np.array(range(p))[:, None], l * p + np.array(range(p))[None, :]] S_lm = S_Y[l * p + np.array(range(p))[:, None], m * p + np.array(range(p))[None, :]] SO = SO + np.matmul(S_ml, Omega_list[l + 1]) OS = OS + np.matmul(Omega_list[l + 1], S_lm) OSO = OSO + np.matmul( np.matmul(Omega_list[m + 1], S_ml), Omega_list[l + 1]) S_mm = S_Y[m * p + np.array(range(p))[:, None], m * p + np.array(range(p))[None, :]] S_k = S_mm - np.matmul(SO, A_inv) - np.matmul(A_inv, OS) S_K_list.append(S_k) S_0 = A_inv + np.matmul(np.matmul(A_inv, OSO), A_inv) S_K_list = [item + S_0 for item in S_K_list] S_pd_list = [S_0] + S_K_list #M Omega_list = [ cov.graph_lasso(S_pd_list[k], lam_vec[k], verbose=False)[1] for k in range(K + 1) ] # likelihood A = np.zeros([p, p]) for k in range(K): A = A + Omega_list[k] A_inv = alg.inv(A) likelihood_K = 0 for k in range(K): likelihood_K = likelihood_K + np.log( alg.det(Omega_list[k + 1])) - np.trace( np.matmul( S_Y[k * p + np.array(range(p))[:, None], k * p + np.array(range(p))[None, :]], Omega_list[k + 1])) tr_OSOA = 0 for m in range(K): for l in range(K): OSOA = np.matmul( np.matmul( np.matmul( Omega_list[m + 1], S_Y[m * p + np.array(range(p))[:, None], l * p + np.array(range(p))[None, :]]), Omega_list[l + 1]), A_inv) tr_OSOA = tr_OSOA + np.trace(OSOA) likelihood = likelihood_K + np.log(alg.det( Omega_list[0])) - np.log(alg.det(A)) + tr_OSOA penalty = 0 for k in range(K + 1): penalty = penalty + lam_vec[k] * np.sum( np.abs( np.tril(Omega_list[k], -1) + np.triu(Omega_list[k], 1))) pen_likelihood0 = pen_likelihood pen_likelihood = likelihood - penalty print(pen_likelihood) Omega_lam1_list.append(Omega_list) print([i, j]) Omega_grid_list.append(Omega_lam1_list) return Omega_grid_list
def fit(self, X): ''' Copulafit using Gaussian copula with marginals evaluated by Gaussian KDE Precision matrix is evaluated using specified method, default to graphical LASSO :param X: input dataset :return: estimated precision matrix rho ''' N, d = X.shape if self.scaler is not None: X_scale = self.scaler.fit_transform(X) else: X_scale = X if len(self.vertexes) == 0: self.vertexes = [str(id) for id in range(d)] self.theta = 1.0 / N cum_marginals = np.zeros_like(X) inv_norm_cdf = np.zeros_like(X) # inv_norm_cdf_scaled = np.zeros_like(X) self.kernels = list([]) # TODO: complexity O(Nd) is high if self.verbose: colored('>> Computing marginals', color='blue') for j in range(cum_marginals.shape[1]): self.kernels.append(gaussian_kde(X_scale[:, j])) cum_pdf_overall = self.kernels[-1].integrate_box_1d( X_scale[:, j].min(), X_scale[:, j].max()) for i in range(cum_marginals.shape[0]): cum_marginals[i, j] = self.kernels[-1].integrate_box_1d( X_scale[:, j].min(), X_scale[i, j]) / cum_pdf_overall # truncate cumulative marginals if cum_marginals[i, j] < self.theta: cum_marginals[i, j] = self.theta elif cum_marginals[i, j] > 1 - self.theta: cum_marginals[i, j] = 1 - self.theta # inverse of normal CDF: \Phi(F_j(x))^{-1} inv_norm_cdf[i, j] = norm.ppf(cum_marginals[i, j]) # scaled to preserve mean and variance: u_j + \sigma_j*\Phi(F_j(x))^{-1} # inv_norm_cdf_scaled[i, j] = X_scale[:, j].mean() + X_scale[:, j].std() * inv_norm_cdf[i, j] if self.method == 'mle': # maximum-likelihood estiamtor empirical_cov = EmpiricalCovariance() empirical_cov.fit(inv_norm_cdf) if self.verbose: print colored('>> Running MLE to estiamte precision matrix', color='blue') self.est_cov = empirical_cov.covariance_ self.corr = scale_matrix(self.est_cov) self.precision_ = inv(empirical_cov.covariance_) if self.method == 'glasso': if self.verbose: print colored('>> Running glasso to estiamte precision matrix', color='blue') empirical_cov = EmpiricalCovariance() empirical_cov.fit(inv_norm_cdf) # shrunk convariance to avoid numerical instability shrunk_cov = shrunk_covariance(empirical_cov.covariance_, shrinkage=0.8) self.est_cov, self.precision_ = graph_lasso(emp_cov=shrunk_cov, alpha=self.penalty, verbose=self.verbose, max_iter=self.max_iter) self.corr = scale_matrix(self.est_cov) if self.method == 'ledoit_wolf': if self.verbose: print colored( '>> Running ledoit_wolf to estiamte precision matrix', color='blue') self.est_cov, _ = ledoit_wolf(inv_norm_cdf) self.corr = scale_matrix(self.est_cov) self.precision_ = linalg.inv(self.est_cov) if self.method == 'spectral': '''L2 mehtod, use paper Inverse covariance estimation for high dimension data in linear time and space :formular: in paper eq(8) ''' if self.verbose: print colored( '>> Running Riccati to estiamte precision matrix', color='blue') # TODO: note estimated cov is sample cov self.est_cov, self.precision_ = spectral(inv_norm_cdf, rho=2 * self.penalty, assume_centered=False) self.corr = scale_matrix(self.est_cov) if self.method == 'pc': clf = pgmlearner.PGMLearner() data_list = list([]) for row_id in range(X_scale.shape[0]): instance = dict() for i, n in enumerate(self.vertexes): instance[n] = X_scale[row_id, i] data_list.append(instance) graph = clf.lg_constraint_estimatestruct(data=data_list, pvalparam=self.pval, bins=self.bins) dag = np.zeros(shape=(len(graph.V), len(graph.V))) for e in graph.E: dag[self.vertexes.index(e[0]), self.vertexes.index(e[1])] = 1 self.conditional_independences_ = dag if self.method == 'ic': df = dict() variable_types = dict() for j in range(X_scale.shape[1]): df[self.vertexes[j]] = X_scale[:, j] variable_types[self.vertexes[j]] = 'c' data = pd.DataFrame(df) # run the search ic_algorithm = IC(RobustRegressionTest, data, variable_types, alpha=self.pval) graph = ic_algorithm.search() dag = np.zeros(shape=(X_scale.shape[1], X_scale.shape[1])) for e in graph.edges(data=True): i = self.vertexes.index(e[0]) j = self.vertexes.index(e[1]) dag[i, j] = 1 dag[j, i] = 1 arrows = set(e[2]['arrows']) head_len = len(arrows) if head_len > 0: head = arrows.pop() if head_len == 1 and head == e[0]: dag[i, j] = 0 if head_len == 1 and head == e[1]: dag[j, i] = 0 self.conditional_independences_ = dag # finally we fit the structure self.fit_structure(self.precision_)
def glasso(X, alpha=1, w0=None, maxit=1000, rtol=1e-5, retall=False, verbosity='NONE'): r""" Learn graph by imposing promoting sparsity in the inverse covariance. This is done by solving :math:`\tilde{W} = \underset{W \succeq 0}{\text{arg}\min} \, -\log \det W - \text{tr}(SW) + \alpha\|W \|_{1,1}, where :math:`S` is the empirical (sample) covariance matrix. Parameters ---------- X : array_like An N-by-M data matrix of N variable observations in an M-dimensional space. The learned graph will have N nodes. alpha : float, optional Regularization parameter acting on the l1-norm w0 : array_like, optional Initialization of the inverse covariance. Must be an N-by-N symmetric positive semi-definite matrix. maxit : int, optional Maximum number of iterations. rtol : float, optional Stopping criterion. If the dual gap goes below this value, iterations are stopped. See :func:`sklearn.covariance.graph_lasso`. retall : boolean Return solution and problem details. verbosity : {'NONE', 'ALL'}, optional Level of verbosity of the solver. See :func:`sklearn.covariance.graph_lasso`/ Returns ------- W : array_like Learned inverse covariance matrix problem : dict, optional Information about the solution of the optimization. Only returned if retall == True. Notes ----- This function uses the solver :func:`sklearn.covariance.graph_lasso`. Examples -------- """ # Parse X S = np.cov(X) # Parse initial point w0 = np.ones(S.shape) if w0 is None else w0 if (w0.shape != S.shape): raise ValueError("w0 must be of dimension N-by-N.") # Solve problem tstart = time.time() res = graph_lasso(emp_cov=S, alpha=alpha, cov_init=w0, mode='cd', tol=rtol, max_iter=maxit, verbose=(verbosity == 'ALL'), return_costs=True, return_n_iter=True) problem = { 'sol': res[1], 'dual_sol': res[0], 'solver': 'sklearn.covariance.graph_lasso', 'crit': 'dual_gap', 'niter': res[3], 'time': time.time() - tstart, 'objective': np.array(res[2])[:, 0] } W = problem['sol'] if retall: return W, problem else: return W
def MarkovNet(sigma2, k, lambda_vec, tol=10**-14, opt=0): # This function performs parsimonious Markov network shrinkage on # correlation matrix c2 # INPUTS # sigma2 :[matrix](n_ x n_) input matrix # k :[scalar] number of null entries to be reached # lambda_vec :[vector](1 x n_) penalty values # tol :[scalar] tolerance to check the number of null entries # opt :[scalar] if !=0 forces the function to return matrices c2_bar and phi2 computed for each penalty value in lambda_vec # OPS # sigma2_bar :[matrix](n_ x n_) shrunk covariance matrix # c2_bar :[matrix](n_ x n_) shrunk correlation matrix # phi2_bar :[matrix](n_ x n_) inverse of the shrunk correlation matrixc2_bar # lambda_bar :[scalar] optimal penalty value # conv :scalar ==1 if the target of k null entries is reached ==0 otherwise # l_bar :[scalar] if opt!=0 l_bar is the index such that c2_bar((:,:,l_bar)) and phi2(:,:,l_bar) are the optimal matrices # For details on the exercise, see here . ## Code lambda_vec = sort(lambda_vec) l_ = len(lambda_vec) c2_bar = zeros(sigma2.shape + (l_, )) phi2_bar = zeros(sigma2.shape + (l_, )) z = zeros(l_) # Compute correlation sigma_vec = sqrt(diag(sigma2)) c2 = diagflat(1 / sigma_vec) @ sigma2 @ diagflat(1 / sigma_vec) for l in range(l_): lam = lambda_vec[l] # Perform Graphical Lasso _, invs2_tilde, *_ = graph_lasso(c2, lam) # Correlation extraction c2_tilde = eye(sigma2.shape[0]).dot(pinv(invs2_tilde)) c2_bar[:, :, l] = diagflat(1 / diag(sqrt(c2_tilde))) @ c2_tilde @ diagflat( 1 / diag(sqrt(c2_tilde))) # estimated correlation matrix phi2_bar[:, :, l] = diagflat(diag(sqrt(c2_tilde))) @ invs2_tilde @ diagflat( diag(sqrt(c2_tilde))) # inverse correlation matrix tmp = abs(phi2_bar[:, :, l]) z[l] = npsum(tmp < tol) # Selection index = where(z >= k)[0] if index == []: index = l_ conv = 0 # target of k null entries not reached else: conv = 1 # target of k null entries reached l_bar = index[0] lambda_bar = lambda_vec[l_bar] # Output if opt == 0: c2_bar = c2_bar[:, :, l_bar] # shrunk correlation phi2_bar = phi2_bar[:, :, l_bar] # shrunk inverse correlation l_bar = None sigma2_bar = diagflat(sigma_vec) @ c2_bar @ diagflat( sigma_vec) # shrunk covariance else: sigma2_bar = zeros(sigma2.shape + (l_, )) for l in range(l_): sigma2_bar[:, :, l] = diagflat( sigma_vec) @ c2_bar[:, :, l] @ diagflat(sigma_vec) return sigma2_bar, c2_bar, phi2_bar, lambda_bar, conv, l_bar
def shrink_matrix(matrix): # glasso in run time to shrink the matrix for the network graph glasso = graph_lasso(matrix.as_matrix(), 0.4, verbose=True, mode='cd') return pd.DataFrame(glasso[0], index=matrix.index, columns=matrix.index)