def main(): mean = torch.tensor(np.ones(16), dtype=torch.float32) diag = torch.tensor(np.ones(16), dtype=torch.float32) population = Gaussian_Distribution(mean=mean, diag=diag, sub=0.3, type='chain', slash=1) truth = population.invcov.numpy() n = 1000 d = population.dim print(truth) dist, sample, _, S = population.generate(n, numpy_like=True) #print(S) #print(np.array(sample)) print(sample_mean(np.array(sample))) print(sample_cov(np.array(sample))) R = np.linalg.inv(S) #print(R) #print(sample) np.random.seed(0) model = GraphicalLassoCV() model.fit(np.array(sample)) cov_ = model.covariance_ prec_ = model.precision_ heatmap(prec_) plt.figure(figsize=(4, 3)) plt.axes([.2, .15, .75, .7]) plt.plot(model.cv_alphas_, np.mean(model.grid_scores_, axis=1), 'o-') plt.axvline(model.alpha_, color='.5') plt.title('Model selection') plt.ylabel('Cross-validation score') plt.xlabel('alpha') plt.show() print(model.cv_alphas_, model.grid_scores_) model = GraphicalLasso() model.fit(sample) heatmap(model.precision_, 0.055) score = dict() score['log_lik'] = [] score['AIC'] = [] alpha_list = np.hstack((np.arange(0, 0.1, 0.001), np.arange(0.11, 0.3, 0.01))) data = np.array(sample) for alpha in alpha_list: out_dict = cross_val_score_GLasso(data, alpha=alpha) score['log_lik'].append(out_dict['log_lik']) score['AIC'].append(out_dict['AIC']) plt.plot(alpha_list, score['log_lik'], 'o-') plt.show() plt.plot(alpha_list, score['AIC']) plt.show()
def lasso(xs): '''Use GraphicalLassoCV. Parameters ---------- xs : array_like N samples of X. Returns ------- C : array_like Covariance matrix estimate. Notes ----- This implementation uses cross-validation to find the correct weight for Lasso. Graphical Lasso is a method for finding a sparse inverse covariance matrix, so this is additional information that might not follow from having a Toeplitz covariance matrix... ''' model = GraphicalLassoCV(cv=3) model.fit(xs) C = model.covariance_ return C
def create_prior_from_samples(self, samples): from sklearn.covariance import GraphicalLassoCV from numpy import asarray, linalg model = GraphicalLassoCV() model.fit(asarray(samples)) return th_Mahalanobis( asarray(samples).mean(axis=0), linalg.cholesky(model.precision_), self.prefix)
def get_optimal_cov_estimator(time_series): from sklearn.covariance import GraphicalLassoCV estimator = GraphicalLassoCV(cv=5, assume_centered=True) print("\nSearching for best Lasso estimator...\n") try: estimator.fit(time_series) return estimator except BaseException: ix = 0 print("\nModel did not converge on first attempt. " "Varying tolerance...\n") while not hasattr(estimator, 'covariance_') and \ not hasattr(estimator, 'precision_') and ix < 3: for tol in [0.1, 0.01, 0.001, 0.0001]: print(f"Tolerance={tol}") estimator = GraphicalLassoCV(cv=5, max_iter=200, tol=tol, assume_centered=True) try: estimator.fit(time_series) return estimator except BaseException: ix += 1 continue if not hasattr(estimator, 'covariance_') and not hasattr( estimator, 'precision_'): print("Unstable Lasso estimation. Applying shrinkage to empirical " "covariance...") from sklearn.covariance import ( GraphicalLasso, empirical_covariance, shrunk_covariance, ) try: emp_cov = empirical_covariance(time_series, assume_centered=True) for i in np.arange(0.8, 0.99, 0.01): print(f"Shrinkage={i}:") shrunk_cov = shrunk_covariance(emp_cov, shrinkage=i) alphaRange = 10.0**np.arange(-8, 0) for alpha in alphaRange: print(f"Auto-tuning alpha={alpha}...") estimator_shrunk = GraphicalLasso(alpha, assume_centered=True) try: estimator_shrunk.fit(shrunk_cov) return estimator_shrunk except BaseException: continue except BaseException: return None else: return estimator
def _infer_network(self, data): """ Infer the network. Args: data (pd.DataFrame): data to be used for the inference. """ entities = data.columns model = GraphicalLassoCV(**self.parameters) model.fit(data.values) self.graph = Graph(adjacency=pd.DataFrame( from_precision_matrix_partial_correlations(model.precision_), index=entities, columns=entities)) logger.debug('inferred with {}'.format(self.method))
def getConnectome(imgPath=None, atlasPath=None, viewInBrowser=False, displayCovMatrix=False): """ Gets the connectome of a functional MRI scan imgPath -> absolute or relative path to the .nii file atlasPath -> download path for the reference MSDL atlas viewInBrowser (optional, default=False) -> if True, opens up an interactive viewer in the browser displayCovMatrix (optional, default=False) -> display the inverse covariance matrix Returns a tuple of shape (estimator, atlas) """ # Download the reference atlas atlas = datasets.fetch_atlas_msdl(data_dir=atlasPath) # Loading atlas image stored in 'maps' atlasFilename = atlas['maps'] # Get the time series for the fMRI scan masker = NiftiMapsMasker(maps_img=atlasFilename, standardize=True, memory='nilearn_cache', verbose=5) timeSeries = masker.fit_transform(imgPath) # Compute the connectome using sparse inverse covariance estimator = GraphicalLassoCV() estimator.fit(timeSeries) if (displayCovMatrix): labels = atlas['labels'] plotting.plot_matrix(estimator.covariance_, labels=labels, figure=(9, 7), vmax=1, vmin=-1, title='Covariance') plotting.plot_matrix(estimator.precision_, labels=labels, figure=(9, 7), vmax=1, vmin=-1, title='Inverse covariance (Precision)') #covPlot.get_figure().savefig('Covariance.png') # precPlot.get_figure().savefig('Inverse Covariance.png') if (viewInBrowser): coords = atlas.region_coords view = plotting.view_connectome(-estimator.precision_, coords, '60.0%') #view.save_as_html(file_name='Connectome Test.html') view.open_in_browser() return (estimator, atlas)
def helper_graphical_lasso(X, theta_true, tf_names=[]): # Estimate the covariance if args.mode == 'cv': model = GraphicalLassoCV() else: model = GraphicalLasso(alpha=args.alpha_l1, mode=args.mode, tol=1e-7, enet_tol=1e-6, max_iter=100, verbose=False, assume_centered=False) model.fit(X) # cov_ = model.covariance_ prec_ = model.precision_ if args.USE_TF_NAMES == 'yes' and len(tf_names) != 0: prec_ = postprocess_tf(prec_, tf_names) recovery_metrics = report_metrics(np.array(theta_true), prec_) print( 'GLASSO: FDR, TPR, FPR, SHD, nnz_true, nnz_pred, precision, recall, Fb, aupr, auc' ) print('GLASSO: TEST: Recovery of true theta: ', *np.around(recovery_metrics, 3)) return list(recovery_metrics)
cov = linalg.inv(prec) d = np.sqrt(np.diag(cov)) cov /= d cov /= d[:, np.newaxis] prec *= d prec *= d[:, np.newaxis] X = prng.multivariate_normal(np.zeros(n_features), cov, size=n_samples) X -= X.mean(axis=0) X /= X.std(axis=0) # ############################################################################# # Estimate the covariance emp_cov = np.dot(X.T, X) / n_samples model = GraphicalLassoCV(cv=5) model.fit(X) cov_ = model.covariance_ prec_ = model.precision_ lw_cov_, _ = ledoit_wolf(X) lw_prec_ = linalg.inv(lw_cov_) # ############################################################################# # Plot the results plt.figure(figsize=(10, 6)) plt.subplots_adjust(left=0.02, right=0.98) # plot the covariances covs = [('Empirical', emp_cov), ('Ledoit-Wolf', lw_cov_), ('GraphicalLassoCV', cov_), ('True', cov)] vmax = cov_.max()
print('time series has {0} samples'.format(timeseries.shape[0])) ############################################################################### # in which situation the graphical lasso **sparse inverse covariance** # estimator captures well the covariance **structure**. try: from sklearn.covariance import GraphicalLassoCV except ImportError: # for Scitkit-Learn < v0.20.0 from sklearn.covariance import GraphLassoCV as GraphicalLassoCV covariance_estimator = GraphicalLassoCV(cv=3, verbose=1) ############################################################################### # We just fit our regions signals into the `GraphicalLassoCV` object covariance_estimator.fit(timeseries) ############################################################################### # and get the ROI-to-ROI covariance matrix. matrix = covariance_estimator.covariance_ print('Covariance matrix has shape {0}.'.format(matrix.shape)) ############################################################################### # Plot matrix, graph, and strength # -------------------------------- # # We use `:func: nilearn.plotting.plot_matrix` to visualize our correlation matrix # and display the graph of connections with `nilearn.plotting.plot_connectome`. from nilearn import plotting plotting.plot_matrix(matrix,
def main(): mean = torch.tensor(np.ones(16), dtype=torch.float32) diag = torch.tensor(np.ones(16), dtype=torch.float32) population = Gaussian_Distribution(mean=mean, diag=diag, sub=0.25, type='chain', slash=1) truth = population.invcov.numpy() n = 1000 d = population.dim print(truth) dist, sample, _, S = population.generate(n, numpy_like=True) # ############################################################################# # Generate the data n_samples = 60 n_features = 20 prng = np.random.RandomState(1) prec = make_sparse_spd_matrix(n_features, alpha=.98, smallest_coef=.4, largest_coef=.7, random_state=prng) cov = linalg.inv(prec) d = np.sqrt(np.diag(cov)) cov /= d cov /= d[:, np.newaxis] prec *= d prec *= d[:, np.newaxis] X = prng.multivariate_normal(np.zeros(n_features), cov, size=n_samples) X -= X.mean(axis=0) X /= X.std(axis=0) #prec = population.invcov # ############################################################################# # Estimate the covariance emp_cov = np.dot(X.T, X) / n_samples model = GraphicalLassoCV() model.fit(sample) cov_ = model.covariance_ prec_ = model.precision_ lw_cov_, _ = ledoit_wolf(X) lw_prec_ = linalg.inv(lw_cov_) # ############################################################################# # Plot the results plt.figure(figsize=(10, 6)) plt.subplots_adjust(left=0.02, right=0.98) # plot the covariances covs = [('Empirical', emp_cov), ('Ledoit-Wolf', lw_cov_), ('GraphicalLassoCV', cov_), ('True', cov)] vmax = cov_.max() for i, (name, this_cov) in enumerate(covs): plt.subplot(2, 4, i + 1) plt.imshow(this_cov, interpolation='nearest', vmin=-vmax, vmax=vmax, cmap=plt.cm.RdBu_r) plt.xticks(()) plt.yticks(()) plt.title('%s covariance' % name) # plot the precisions precs = [('Empirical', linalg.inv(emp_cov)), ('Ledoit-Wolf', lw_prec_), ('GraphicalLasso', prec_), ('True', prec)] vmax = .9 * prec_.max() for i, (name, this_prec) in enumerate(precs): ax = plt.subplot(2, 4, i + 5) plt.imshow(np.ma.masked_equal(this_prec, 0), interpolation='nearest', vmin=-vmax, vmax=vmax, cmap=plt.cm.RdBu_r) plt.xticks(()) plt.yticks(()) plt.title('%s precision' % name) if hasattr(ax, 'set_facecolor'): ax.set_facecolor('.7') else: ax.set_axis_bgcolor('.7') # plot the model selection metric plt.figure(figsize=(4, 3)) plt.axes([.2, .15, .75, .7]) plt.plot(model.cv_alphas_, np.mean(model.grid_scores_, axis=1), 'o-') plt.axvline(model.alpha_, color='.5') plt.title('Model selection') plt.ylabel('Cross-validation score') plt.xlabel('alpha') plt.show()
vmax=max_precision, colorbar=False) if n == 0: plt.title("group-sparse\n$\\alpha=%.2f$" % gsc.alpha_) # Fit one graph lasso per subject try: from sklearn.covariance import GraphicalLassoCV except ImportError: # for Scitkit-Learn < v0.20.0 from sklearn.covariance import GraphLassoCV as GraphicalLassoCV gl = GraphicalLassoCV(verbose=1) for n, subject in enumerate(subjects[:n_displayed]): gl.fit(subject) ax = plt.subplot(n_displayed, 4, 4 * n + 3) max_precision = gl.precision_.max() plotting.plot_matrix(gl.precision_, axes=ax, vmin=-max_precision, vmax=max_precision, colorbar=False) if n == 0: plt.title("graph lasso") plt.ylabel("$\\alpha=%.2f$" % gl.alpha_) # Fit one graph lasso for all subjects at once import numpy as np gl.fit(np.concatenate(subjects))
def sparce_invcov(self, df, cols=None, style="GraphLassoCV", param=0.2, layout="circular", center=None, figsize=(7, 7)): """ cols: columns to calculate. If None, takes all numerical columns style: GraphLassoCV or LedoitWolf param: Parameter to pass to fitting algorithm. If GraphLasso, =alpha; if LedoitWolf, =threshold layout: choose between "circular", "spring", "shell" center: Put a certain colname in the center of the graph Sparse covariance matrix estimation Plot the sparce precision matrix """ new_df = Utility().normalize(df).dropna() # Remove NA, normalize if cols == None: cols = df._get_numeric_data().columns data = new_df[cols] if style == "GraphLassoCV": model = GraphicalLassoCV(alphas=[param, param], cv=10, max_iter=5000) model.fit(data) sparce_mat = np.zeros(np.shape(model.precision_)) sparce_mat[model.precision_ != 0] = -1 np.fill_diagonal(sparce_mat, 1) else: # Style == LedoitWolf model = LedoitWolf() model.fit(data) sparce_mat = np.zeros(np.shape(model.get_precision())) sparce_mat[np.abs(model.get_precision()) > param] = -1 np.fill_diagonal(sparce_mat, 1) sparce_mat = pd.DataFrame(sparce_mat, index=data.columns, columns=data.columns) # NetworkX Graph fig, ax = plt.subplots(figsize=figsize) G = nx.from_pandas_adjacency(sparce_mat) pos = { "circular": nx.drawing.circular_layout, "shell": nx.drawing.shell_layout, "spring": nx.drawing.spring_layout, }[layout](G, scale=2) pos[center] = np.array([0, 0]) node_color = [ 'mintcream' if node == center else 'mintcream' for node in G.nodes ] node_size = [ len(node) * 1500 if node == center else len(node) * 500 for node in G.nodes() ] nodes = nx.draw_networkx_nodes(G, pos, node_shape='o', node_color=node_color, node_size=node_size) nodes.set_edgecolor('k') nx.draw_networkx_edges(G, pos, edge_color='r', width=2.0, alpha=0.8) nx.draw_networkx_labels(G, pos, font_weight='bold', font_size=10) plt.axis('off') plt.tight_layout() # Display precision matrix as heatmap fig, ax = plt.subplots(figsize=(5, 5)) sns.heatmap(sparce_mat, vmax=1, vmin=-1, linewidth=0.1, cmap=plt.cm.RdBu_r, cbar=False) ax.set_ylim(sparce_mat.T.shape[0] - 1e-9, -1e-9) plt.title('Sparse Inverse Covariance') plt.show() return sparce_mat
def Bayes_fghorse(data, p, nBurnin=1e3, nIter=10e3): ##blockwise Frobunius norm for precision matrix def F_norm(Theta, p, M): matx = np.kron(np.diag(np.ones(p, dtype=np.float32)), np.ones(M, dtype=np.float32)).transpose() matx = tf.constant(matx, dtype=tf.float32) Theta_F = tf.linalg.matmul(tf.linalg.matmul(matx, tf.math.square(Theta), transpose_a=True, a_is_sparse=True), matx, b_is_sparse=True) return tf.math.sqrt(Theta_F) def sampleLambda(Theta_F, Nu, tau_sq): gamma_lambda = tfd.Gamma( (1 + M**2) / 2, tf.math.divide(1, Nu) + tf.math.scalar_mul(1 / (2 * tau_sq), tf.math.square(Theta_F))) return tf.math.divide(1, gamma_lambda.sample(1)[0, :, :]) def sampleNu(Lambda): Nu_gamma = tfd.Gamma(1, 1 + tf.math.divide(1, Lambda)) return tf.math.divide(1, Nu_gamma.sample(1)[0, :, :]) def permut(Mat, mat_p): return tf.linalg.matmul(tf.linalg.matmul(mat_p, Mat), mat_p) def parti(Mat, j): #functional for partitioning matirx exclude_row = tf.concat([Mat[:j, :], Mat[(j + 1):, :]], axis=0) Mat11 = tf.concat([exclude_row[:, :j], exclude_row[:, (j + 1):]], axis=1) Mat12 = tf.concat([Mat[:, j][:j], Mat[:, j][(j + 1):]], axis=0) Mat21 = Mat12 Mat22 = Mat[j, j] return (Mat11, Mat12, Mat21, Mat22) def is_pos_def(x): return np.all(np.linalg.eigvals(x) > 0) N = data.shape[0] M = int(data.shape[1] / p) ##centralize data data = data - np.mean(data, axis=0).reshape((1, p * M)) S = np.matmul(data.transpose(), data) S = tf.constant(S) ### Use glasso with CV to get initial values: glasso_model = GraphicalLassoCV(cv=5) glasso_model.fit(data) #initial values Theta = glasso_model.precision_ ##you can also use identity as initial Theta = tf.constant(Theta, dtype=tf.float32) Theta_F = F_norm(Theta, p, M) tau_sq = 1 zeta = 1 Nu = tf.ones([p, p], dtype=tf.float32) Lambda = sampleLambda(Theta_F, Nu, tau_sq) lambda_ = 1 ##diagonal Nu = sampleNu(Lambda) samples = [] for it in tqdm(range(-int(nBurnin), int(nIter) + 1, 1)): for i in range(p): ##create permutation matrix for exchange ith block and pth block m = np.diag(np.ones(p)).astype(np.float32) m[:, [i, p - 1]] = m[:, [p - 1, i]] m1 = tf.linalg.LinearOperatorFullMatrix(m) m2 = tf.linalg.LinearOperatorFullMatrix( np.diag(np.ones(M, dtype=np.float32))) mat_p = tf.linalg.LinearOperatorKronecker([m1, m2]).to_dense() #exchange the ith and pth node Theta_ = permut(Theta, mat_p) S_ = permut(S, mat_p) m1 = tf.linalg.LinearOperatorFullMatrix(Lambda) m2 = tf.linalg.LinearOperatorFullMatrix( np.ones([M, M], dtype=np.float32)) Lambda_mat = tf.linalg.LinearOperatorKronecker([m1, m2]).to_dense() Lambda_ = permut(Lambda_mat, mat_p) m1 = tf.linalg.LinearOperatorFullMatrix(Nu) m2 = tf.linalg.LinearOperatorFullMatrix( np.ones([M, M], dtype=np.float32)) Nu_mat = tf.linalg.LinearOperatorKronecker([m1, m2]).to_dense() Nu_ = permut(Nu_mat, mat_p) for j_ in range(int(M)): j = (p - 1) * M + j_ ##partition matrices: (Theta11, Theta12, Theta21, Theta22) = parti(Theta_, j) Theta11_inv = tf.linalg.inv(Theta11)[:(p - 1) * M, :(p - 1) * M] (S11, S12, S21, S22) = parti(S_, j) (Lambda11, Lambda12, Lambda21, Lambda22) = parti(Lambda_, j) (Nu11, Nu12, Nu21, Nu22) = parti(Nu_, j) gamma = np.random.gamma(shape=N / 2 + 1, scale=2 / (S22 + lambda_**2)) Ell = tf.linalg.cholesky( (S22 + lambda_**2) * Theta11_inv + tf.linalg.diag(1 / (tau_sq * Lambda12[:(p - 1) * M]))) temp1 = tf.linalg.solve( Ell, tf.expand_dims(-1 * S21[:(p - 1) * M], axis=1)) mu = tf.linalg.solve(tf.transpose(Ell), temp1) vee = tf.linalg.solve( tf.transpose(Ell), tf.expand_dims(tf.constant( np.random.normal(size=mu.shape[0]), dtype=np.float32), axis=1)) beta = mu + vee aa = np.zeros(M, dtype=np.float32) aa[j_] = gamma + tf.math.reduce_sum( beta * tf.linalg.matmul(Theta11_inv, beta)) aa = tf.constant(aa) temp = tf.concat([beta, tf.expand_dims(aa, axis=1)], axis=0) ##update jth column and jth row of Theta_ Theta_ = tf.concat([Theta_[:, :j], temp, Theta_[:, j + 1:]], axis=1) Theta_ = tf.concat( [Theta_[:j, :], tf.transpose(temp), Theta_[j + 1:, :]], axis=0) Theta = permut(Theta_, mat_p) ##update F_norm Theta_F = F_norm(Theta, p, M) #update Lambda Lambda = sampleLambda(Theta_F, Nu, tau_sq) #update Nu: Nu = sampleNu(Lambda) #update tau up_sum = tf.math.reduce_sum( tf.linalg.set_diag( tf.linalg.band_part( tf.math.divide(tf.math.square(Theta_F), Lambda), 0, -1), np.zeros(p, dtype=np.float32))) scale_tau = 1 / (1 / zeta + up_sum.numpy() / 2) tau_sq = 1 / np.random.gamma(shape=(M**2 * p * (p - 1) + 2) / 4, scale=scale_tau) ##update zeta zeta = 1 / np.random.gamma(shape=1, scale=1 / (1 + 1 / tau_sq)) if it > 0: samples.append(Theta) samples = np.stack(samples, axis=0) return samples
def main(): mean = torch.tensor(np.zeros(32), dtype=torch.float32) diag = torch.tensor(np.ones(32), dtype=torch.float32) X = torch.eye(32, dtype=torch.float32) X[5, 10] = X[10, 5] = X[19, 20] = X[20, 19] = X[1, 31] = X[31, 1] = -0.5 population = Gaussian_Distribution(mean=mean, diag=diag, sub=-0.2, type='DIY', slash=1, prec=X) truth = population.invcov.numpy() n = 400 p = population.dim print(truth) #heatmap(truth) data = pd.read_csv("chain.csv") sample = data.values[1:, 1:] p_sample = sample #p_emp_cov = sample_cov(p_sample) sample = z_score(sample) emp_cov = sample_cov(sample) model = ProxGrad_l0() alpha = 0.05 prec = model.fit_FISTA(emp_cov, alpha) heatmap(prec) print('nonzero:', L0_penal(prec)) score = dict() score['log_lik'] = [] score['AIC'] = [] score['non_zero'] = [] alpha_list = np.hstack((np.arange(1e-5, 0.1, 0.002), np.arange(0.1, 0.3, 0.01))) #data = np.array(sample) for alpha in alpha_list: out_dict = cross_val_score_ProxGrad_l0(sample, alpha=alpha, type='FISTA') score['log_lik'].append(out_dict['log_lik']) score['AIC'].append(out_dict['AIC']) score['non_zero'].append(out_dict['non_zero']) plt.plot(alpha_list, score['log_lik']) plt.show() plt.plot(alpha_list, score['AIC']) plt.show() plt.plot(alpha_list, score['non_zero']) plt.show() model = ProxGrad_l0() l = len(alpha_list) alpha = 0 log_lik = -1e12 for i in range(0, l): if score['log_lik'][i] > log_lik: alpha = alpha_list[i] log_lik = score['log_lik'][i] print(alpha) prec = model.fit_FISTA(emp_cov, alpha) heatmap(prec) print('nonzero:', L0_penal(prec)) alpha = 0 aic = 1e12 for i in range(0, l): if score['AIC'][i] < aic: alpha = alpha_list[i] aic = score['AIC'][i] print(alpha) prec = model.fit_FISTA(emp_cov, alpha) heatmap(prec) print('nonzero:', L0_penal(prec)) model = GraphicalLassoCV(tol=1e-8) model.fit(sample) heatmap(model.precision_) print('nonzero:', L0_penal(model.precision_))
ns.fit(X) tpr, fpr, prec = nitk.methods.calculate_matrix_accuracy(K, ns.precision_) ns_f1[i] = nitk.methods.calculate_f1_score(tpr, prec) te = nitk.ThresholdEstimatorCV() te.fit(X) tpr, fpr, prec = nitk.methods.calculate_matrix_accuracy(K, te.covariance_) ts_f1[i] = nitk.methods.calculate_f1_score(tpr, prec) sc = nitk.SCIOColumnwiseCV() sc.fit(X) tpr, fpr, prec = nitk.methods.calculate_matrix_accuracy(K, sc.precision_) sc_f1[i] = nitk.methods.calculate_f1_score(tpr, prec) gl = GraphicalLassoCV() gl.fit(X) tpr, fpr, prec = nitk.methods.calculate_matrix_accuracy(K, gl.precision_) gl_f1[i] = nitk.methods.calculate_f1_score(tpr, prec) sli = nitk.ScaledLassoInference() sli.fit(X) tpr, fpr, prec = nitk.methods.calculate_matrix_accuracy(K, sli.precision_) sli_f1[i] = nitk.methods.calculate_f1_score(tpr, prec) cli = nitk.CLIMECV() cli.fit(X) tpr, fpr, prec = nitk.methods.calculate_matrix_accuracy(K, cli.precision_) cli_f1[i] = nitk.methods.calculate_f1_score(tpr, prec) print("Graphical Lasso & %s & %s & %6.3f $\pm$ %6.3f" % (p, n, np.mean(gl_f1), np.std(gl_f1))) print("Neighbourhood Selection Columnwise & %s & %s & %6.3f $\pm$ %6.3f" %
def main(): # 'Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume' df = pd.read_csv(pt.join(DATA_ROOT, '1999_2018_complete.csv')) # drop malaysia and venezuela df.drop(['malaysia', 'venezuela'], axis=1, inplace=True) # print(df.columns[2:]) data = df.values[:, 2:] data = preprocessing.scale(data, axis=1) print(data.shape) # plt.plot(data[:, 6], c='b', label='Japan') # plt.plot(data[:, 13], c='g', label='Sri Lanka') # plt.legend() # plt.show() # Estimate the covariance emp_cov = np.dot(data.T, data) / data.shape[0] for count in range(20): temp_data = data[count * 242:(count + 1) * 242] # GraphicalLasso model = GraphicalLassoCV() model.fit(temp_data) cov_ = model.covariance_ prec_ = model.precision_ # print(model.alpha_) # print(model.cv_alphas_) # print(model.grid_scores_) # print(model.n_iter_) # Ledoit-Wolf lw_cov_, _ = ledoit_wolf(data) lw_prec_ = linalg.inv(lw_cov_) # ############################################################################# # Plot the results # plt.figure(figsize=(8, 6)) # plt.subplots_adjust(left=0.02, right=0.98) # # # plot the covariances # covs = [('Empirical', emp_cov), ('Ledoit-Wolf', # lw_cov_), ('GraphicalLassoCV', cov_)] # vmax = cov_.max() # for i, (name, this_cov) in enumerate(covs): # plt.subplot(2, 3, i + 1) # plt.imshow(this_cov, interpolation='nearest', vmin=-vmax, vmax=vmax, # cmap=plt.cm.RdBu_r) # plt.xticks(()) # plt.yticks(()) # plt.title('%s covariance' % name) # # # plot the precisions # precs = [('Empirical', linalg.inv(emp_cov)), ('Ledoit-Wolf', lw_prec_), # ('GraphicalLasso', prec_)] # vmax = .9 * prec_.max() # for i, (name, this_prec) in enumerate(precs): # ax = plt.subplot(2, 3, i + 4) # plt.imshow(np.ma.masked_equal(this_prec, 0), # interpolation='nearest', vmin=-vmax, vmax=vmax, # cmap=plt.cm.RdBu_r) # plt.xticks(()) # plt.yticks(()) # plt.title('%s precision' % name) # if hasattr(ax, 'set_facecolor'): # ax.set_facecolor('.7') # else: # ax.set_axis_bgcolor('.7') # plt.show() # print(prec_) name = 'GraphicalLasso' this_prec = prec_ vmax = .9 * prec_.max() plt.figure() ax = plt.subplot(1, 1, 1) plt.imshow(np.ma.masked_equal(this_prec, 0), interpolation='nearest', vmin=-vmax, vmax=vmax, cmap=plt.cm.RdBu_r) plt.xticks(()) plt.yticks(()) plt.title('year: %d' % (1999 + count)) if hasattr(ax, 'set_facecolor'): ax.set_facecolor('.7') else: ax.set_axis_bgcolor('.7') plt.show()
ret_data, vol_data, comp_list = load_data.read_data() # Take first 1000 rows (days) as training set and rest as test set train_index = 1000 X_train = ret_data.iloc[0:train_index, :] X_test = ret_data.iloc[train_index:, :] #X = (X - X.mean())/X.std() num_firms = ret_data.shape[1] print("Number of firms: ", num_firms) cov, corr = X_train.cov(), X_train.corr() #print(cov) model = GraphicalLassoCV(cv=5) model.fit(X_train) cov_ = model.covariance_ prec_ = model.precision_ print("What are the alphs in CV: ", model.cv_alphas_) print("Optimal lambda parameter for graphical LASSO: ", model.alpha_) #lw_cov_, _ = ledoit_wolf(X_train) #lw_prec_ = linalg.inv(lw_cov_) # print("Lasso Covariance Matrix:\n", cov_) prec_ = model.precision_ # print("Lasso Inverse Covariance Matrix:\n", prec_) # print("Empirical Covariance Matrix:\n", cov) # Find inverse covariance matrix of empirical covariance
print(f"gradients: {gradients}") # update alpha alpha_grad = np.mean(gradients) diff = np.linalg.norm(alpha_grad) curr_alpha = gradient_update(curr_alpha, alpha_grad) curr_alpha = max(curr_alpha, 0) print(alpha_grad, curr_alpha) # s = prob(samples, new_alpha) return curr_alpha if __name__ == '__main__': import causaldag as cd from sklearn.covariance import GraphicalLassoCV d = cd.rand.directed_erdos(5, .5) g = cd.rand.rand_weights(d) samples = g.sample(100) cvgm_alpha = cvgm_glasso(samples, 1) print(cvgm_alpha) glcv = GraphicalLassoCV(alphas=[.1, .2, .3, .4, .5, .6, .7, .8, .9], cv=10) glcv.fit(samples) print(glcv.alpha_)
# (603, 41) 由此可以看出得到了603个交易日的数据 # 其中有41只股票被选出。 stock_dataset, selected_stocks = preprocess_data(batch_K_data, min_K_num=1100) print(stock_dataset.shape) """ 其他的9只股票因为不满足最小交易日的要求而被删除 这603个交易日是所有41只股票都在交易 都没有停牌的数据。 """ # 这是实际使用的股票列表 print(selected_stocks) # 对这41只股票进行聚类 edge_model = GraphicalLassoCV() edge_model.fit(stock_dataset) _, labels = affinity_propagation(edge_model.covariance_) n_labels = max(labels) """ labels里面是每只股票对应的类别标号 """ print('Stock Clusters: {}'.format(n_labels + 1)) """ 10 即得到10个类别 """ sz50_df2 = sz50_df.set_index('code') # print(sz50_df2) for i in range(n_labels + 1): # print('Cluster: {}----> stocks: {}'.format(i,','.join(np.array(selected_stocks)[labels==i]))) """
############################################################################## # Computing group-sparse precision matrices # ------------------------------------------ from nilearn.connectome import GroupSparseCovarianceCV gsc = GroupSparseCovarianceCV(verbose=2) gsc.fit(subject_time_series) try: from sklearn.covariance import GraphicalLassoCV except ImportError: # for Scitkit-Learn < v0.20.0 from sklearn.covariance import GraphLassoCV as GraphicalLassoCV gl = GraphicalLassoCV(verbose=2) gl.fit(np.concatenate(subject_time_series)) ############################################################################## # Displaying results # ------------------- atlas_img = msdl_atlas_dataset.maps atlas_region_coords = plotting.find_probabilistic_atlas_cut_coords(atlas_img) labels = msdl_atlas_dataset.labels plotting.plot_connectome(gl.covariance_, atlas_region_coords, edge_threshold='90%', title="Covariance", display_mode="lzr") plotting.plot_connectome(-gl.precision_, atlas_region_coords,
from sklearn.preprocessing import StandardScaler # loading the data infile = np.load('DataDynamicConn/Leiden_sub39335_Rt2_K200.npz') ts = infile['ts'] nodes = infile['nodes'] # correlation matrix R = np.corrcoef(ts, rowvar=False) # scaling the input data ts_std = StandardScaler().fit_transform(ts) # ICOV glasso = GraphicalLassoCV(cv=5) glasso.fit(ts_std) ICOV = glasso.covariance_ # visual inspection # zeroing the main diagonal for i in range(R.shape[0]): R[i, i] = 0 ICOV[i, i] = 0 plt.figure(figsize=[6, 3]) plt.subplot(121) plt.imshow(R, interpolation='nearest', vmin=-1, vmax=1, cmap=plt.cm.rainbow) plt.xticks(()) plt.yticks(()) plt.title('Correlation')
def get_conn_matrix(time_series, conn_model, dir_path, node_size, smooth, dens_thresh, network, ID, roi, min_span_tree, disp_filt, parc, prune, atlas_select, uatlas_select, label_names, coords, c_boot, norm, binary): from nilearn.connectome import ConnectivityMeasure from sklearn.covariance import GraphicalLassoCV conn_matrix = None if conn_model == 'corr' or conn_model == 'cor' or conn_model == 'correlation': # credit: nilearn print('\nComputing correlation matrix...\n') conn_measure = ConnectivityMeasure(kind='correlation') conn_matrix = conn_measure.fit_transform([time_series])[0] elif conn_model == 'partcorr' or conn_model == 'parcorr' or conn_model == 'partialcorrelation': # credit: nilearn print('\nComputing partial correlation matrix...\n') conn_measure = ConnectivityMeasure(kind='partial correlation') conn_matrix = conn_measure.fit_transform([time_series])[0] elif conn_model == 'cov' or conn_model == 'covariance' or conn_model == 'covar' or conn_model == 'sps' or conn_model == 'sparse' or conn_model == 'precision': # Fit estimator to matrix to get sparse matrix estimator_shrunk = None estimator = GraphicalLassoCV(cv=5) try: print('\nComputing covariance...\n') estimator.fit(time_series) except: print( 'Unstable Lasso estimation--Attempting to re-run by first applying shrinkage...' ) try: from sklearn.covariance import GraphicalLasso, empirical_covariance, shrunk_covariance emp_cov = empirical_covariance(time_series) for i in np.arange(0.8, 0.99, 0.01): shrunk_cov = shrunk_covariance(emp_cov, shrinkage=i) alphaRange = 10.0**np.arange(-8, 0) for alpha in alphaRange: try: estimator_shrunk = GraphicalLasso(alpha) estimator_shrunk.fit(shrunk_cov) print( "Retrying covariance matrix estimate with alpha=%s" % alpha) if estimator_shrunk is None: pass else: break except: print( "Covariance estimation failed with shrinkage at alpha=%s" % alpha) continue except ValueError: print( 'Unstable Lasso estimation! Shrinkage failed. A different connectivity model may be needed.' ) if estimator is None and estimator_shrunk is None: raise RuntimeError('\nERROR: Covariance estimation failed.') if conn_model == 'sps' or conn_model == 'sparse' or conn_model == 'precision': if estimator_shrunk is None: print( '\nFetching precision matrix from covariance estimator...\n' ) conn_matrix = -estimator.precision_ else: print( '\nFetching shrunk precision matrix from covariance estimator...\n' ) conn_matrix = -estimator_shrunk.precision_ elif conn_model == 'cov' or conn_model == 'covariance' or conn_model == 'covar': if estimator_shrunk is None: print( '\nFetching covariance matrix from covariance estimator...\n' ) conn_matrix = estimator.covariance_ else: conn_matrix = estimator_shrunk.covariance_ elif conn_model == 'QuicGraphicalLasso': try: from inverse_covariance import QuicGraphicalLasso except ImportError: print('Cannot run QuicGraphLasso. Skggm not installed!') # Compute the sparse inverse covariance via QuicGraphLasso # credit: skggm model = QuicGraphicalLasso(init_method='cov', lam=0.5, mode='default', verbose=1) print('\nCalculating QuicGraphLasso precision matrix using skggm...\n') model.fit(time_series) conn_matrix = -model.precision_ elif conn_model == 'QuicGraphLassoCV': try: from inverse_covariance import QuicGraphicalLassoCV except ImportError: print('Cannot run QuicGraphLassoCV. Skggm not installed!') # Compute the sparse inverse covariance via QuicGraphLassoCV # credit: skggm model = QuicGraphicalLassoCV(init_method='cov', verbose=1) print( '\nCalculating QuicGraphLassoCV precision matrix using skggm...\n') model.fit(time_series) conn_matrix = -model.precision_ elif conn_model == 'QuicGraphicalLassoEBIC': try: from inverse_covariance import QuicGraphicalLassoEBIC except ImportError: print('Cannot run QuicGraphLassoEBIC. Skggm not installed!') # Compute the sparse inverse covariance via QuicGraphLassoEBIC # credit: skggm model = QuicGraphicalLassoEBIC(init_method='cov', verbose=1) print( '\nCalculating QuicGraphLassoEBIC precision matrix using skggm...\n' ) model.fit(time_series) conn_matrix = -model.precision_ elif conn_model == 'AdaptiveQuicGraphLasso': try: from inverse_covariance import AdaptiveQuicGraphicalLasso, QuicGraphicalLassoEBIC except ImportError: print('Cannot run AdaptiveGraphLasso. Skggm not installed!') # Compute the sparse inverse covariance via # AdaptiveGraphLasso + QuicGraphLassoEBIC + method='binary' # credit: skggm model = AdaptiveQuicGraphicalLasso( estimator=QuicGraphicalLassoEBIC(init_method='cov', ), method='binary', ) print( '\nCalculating AdaptiveQuicGraphLasso precision matrix using skggm...\n' ) model.fit(time_series) conn_matrix = -model.estimator_.precision_ else: raise ValueError( '\nERROR! No connectivity model specified at runtime. Select a valid estimator using the ' '-mod flag.') if conn_matrix.shape < (2, 2): raise RuntimeError( '\nERROR! Matrix estimation selection yielded an empty or 1-dimensional graph. ' 'Check time-series for errors or try using a different atlas') coords = np.array(coords) label_names = np.array(label_names) return conn_matrix, conn_model, dir_path, node_size, smooth, dens_thresh, network, ID, roi, min_span_tree, disp_filt, parc, prune, atlas_select, uatlas_select, label_names, coords, c_boot, norm, binary
memory='nilearn_cache', verbose=5) time_series = masker.fit_transform(data.func[0], confounds=data.confounds) ############################################################################## # Compute the sparse inverse covariance # -------------------------------------- try: from sklearn.covariance import GraphicalLassoCV except ImportError: # for Scitkit-Learn < v0.20.0 from sklearn.covariance import GraphLassoCV as GraphicalLassoCV estimator = GraphicalLassoCV() estimator.fit(time_series) ############################################################################## # Display the connectome matrix # ------------------------------ from nilearn import plotting # Display the covariance # The covariance can be found at estimator.covariance_ plotting.plot_matrix(estimator.covariance_, labels=labels, figure=(9, 7), vmax=1, vmin=-1, title='Covariance')
cov = linalg.inv(prec) d = np.sqrt(np.diag(cov)) cov /= d cov /= d[:, np.newaxis] prec *= d prec *= d[:, np.newaxis] X = prng.multivariate_normal(np.zeros(n_features), cov, size=n_samples) X -= X.mean(axis=0) X /= X.std(axis=0) # ############################################################################# # Estimate the covariance emp_cov = np.dot(X.T, X) / n_samples model = GraphicalLassoCV() model.fit(X) cov_ = model.covariance_ prec_ = model.precision_ lw_cov_, _ = ledoit_wolf(X) lw_prec_ = linalg.inv(lw_cov_) # ############################################################################# # Plot the results plt.figure(figsize=(10, 6)) plt.subplots_adjust(left=0.02, right=0.98) # plot the covariances covs = [ ("Empirical", emp_cov), ("Ledoit-Wolf", lw_cov_),
def Bayes_fglasso(data, p, regular_parm=None, lambda_shape=1, lambda_rate=0.01, nBurnin=1e3, nIter=10e3): ##blockwise Frobunius norm for precision matrix def F_norm(Theta, p, M): matx = np.kron(np.diag(np.ones(p, dtype=np.float32)), np.ones(M, dtype=np.float32)).transpose() matx = tf.constant(matx, dtype=tf.float32) Theta_F = tf.linalg.matmul(tf.linalg.matmul(matx, tf.math.square(Theta), transpose_a=True, a_is_sparse=True), matx, b_is_sparse=True) return tf.math.sqrt(Theta_F) def sampleLambda(Theta, Tau_sq): shape_new = lambda_shape + p * M + (M**2 + 1) * p * (p - 1) / 4 rate_new = lambda_rate + tf.math.reduce_sum( tf.linalg.diag_part(Theta)) / 2 + tf.math.reduce_sum( tf.linalg.set_diag( tf.linalg.band_part(Tau_sq, 0, -1)[0, :, :], np.zeros(p))) / 2 lamb_sq = np.random.gamma(shape=shape_new, scale=1 / rate_new) return lamb_sq def sampleTau(Theta_F, regular_parm): if tf.math.reduce_min(Theta_F) < 1e-6: Theta_F = Theta_F + 1e-6 tau_ = tfd.InverseGaussian(loc=tf.math.divide(regular_parm, Theta_F), concentration=regular_parm**2).sample(1) return tf.math.divide(1, tau_) def permut(Mat, mat_p): return tf.linalg.matmul(tf.linalg.matmul(mat_p, Mat), mat_p) def parti(Mat, j): #functional for partitioning matirx exclude_row = tf.concat([Mat[:j, :], Mat[(j + 1):, :]], axis=0) Mat11 = tf.concat([exclude_row[:, :j], exclude_row[:, (j + 1):]], axis=1) Mat12 = tf.concat([Mat[:, j][:j], Mat[:, j][(j + 1):]], axis=0) Mat21 = Mat12 Mat22 = Mat[j, j] return (Mat11, Mat12, Mat21, Mat22) # def is_pos_def(x): # return np.all(np.linalg.eigvals(x) > 0) N = data.shape[0] M = int(data.shape[1] / p) ##centralize data data = data - np.mean(data, axis=0).reshape((1, p * M)) S = np.matmul(data.transpose(), data) S = tf.constant(S) ### Use glasso with CV to get initial values: glasso_model = GraphicalLassoCV( cv=5) ## you can also use identity as initial glasso_model.fit(data) #initial values Theta = glasso_model.precision_ Theta = tf.constant(Theta, dtype=tf.float32) Theta_F = F_norm(Theta, p, M) Tau_sq = sampleTau(Theta_F, regular_parm) o1 = tf.linalg.LinearOperatorFullMatrix(Tau_sq[0, :, :]) o2 = tf.linalg.LinearOperatorFullMatrix(np.ones([M, M], dtype=np.float32)) Tau = tf.linalg.LinearOperatorKronecker([o1, o2]).to_dense() samples = [] lambda_sq = [] for it in tqdm(range(-int(nBurnin), int(nIter) + 1, 1)): for i in range(p): ##create permutation matrix for exchange ith block and pth block m = np.diag(np.ones(p)).astype(np.float32) m[:, [i, p - 1]] = m[:, [p - 1, i]] m1 = tf.linalg.LinearOperatorFullMatrix(m) m2 = tf.linalg.LinearOperatorFullMatrix( np.diag(np.ones(M, dtype=np.float32))) mat_p = tf.linalg.LinearOperatorKronecker([m1, m2]).to_dense() #exchange the ith and pth node Theta_ = permut(Theta, mat_p) S_ = permut(S, mat_p) Tau_ = permut(Tau, mat_p) ##for every principal component for j_ in range(int(M)): j = (p - 1) * M + j_ ##partition matrices: (Theta11, Theta12, Theta21, Theta22) = parti(Theta_, j) Theta11_inv = tf.linalg.inv(Theta11)[:(p - 1) * M, :(p - 1) * M] (S11, S12, S21, S22) = parti(S_, j) (Tau11, Tau12, Tau21, Tau22) = parti(Tau_, j) gamma = np.random.gamma(shape=N / 2 + 1, scale=2 / (S22 + regular_parm**2)) Ell = tf.linalg.cholesky( (S22 + regular_parm**2) * Theta11_inv + tf.linalg.diag(1 / Tau12[:(p - 1) * M])) temp1 = tf.linalg.solve( Ell, tf.expand_dims(-1 * S21[:(p - 1) * M], axis=1)) mu = tf.linalg.solve(tf.transpose(Ell), temp1) vee = tf.linalg.solve( tf.transpose(Ell), tf.expand_dims(tf.constant( np.random.normal(size=mu.shape[0]), dtype=np.float32), axis=1)) beta = mu + vee aa = np.zeros(M, dtype=np.float32) aa[j_] = gamma + tf.math.reduce_sum( beta * tf.linalg.matmul(Theta11_inv, beta)) aa = tf.constant(aa) temp = tf.concat([beta, tf.expand_dims(aa, axis=1)], axis=0) ##update jth column and jth row of Theta_ Theta_ = tf.concat([Theta_[:, :j], temp, Theta_[:, j + 1:]], axis=1) Theta_ = tf.concat( [Theta_[:j, :], tf.transpose(temp), Theta_[j + 1:, :]], axis=0) Theta = permut(Theta_, mat_p) #update Tau Tau_sq = sampleTau(Theta_F, regular_parm) o1 = tf.linalg.LinearOperatorFullMatrix(Tau_sq[0, :, :]) o2 = tf.linalg.LinearOperatorFullMatrix( np.ones([M, M], dtype=np.float32)) Tau_o = tf.linalg.LinearOperatorKronecker([o1, o2]) Tau = Tau_o.to_dense() ##update Theta_F Theta_F = F_norm(Theta, p, M) ## update lambda if regular_parm is None: regular_parm = sampleLambda(Theta, Tau_sq) #Store: if it > 0: samples.append(Theta) if regular_parm is None: lambda_sq.append(regular_parm) samples = tf.stack(samples, axis=0) return (samples, lambda_sq)
def get_conn_matrix(time_series, conn_model, dir_path, node_size, smooth, dens_thresh, network, ID, roi, min_span_tree, disp_filt, parc, prune, atlas, uatlas, labels, coords, norm, binary, hpass, extract_strategy): """ Computes a functional connectivity matrix based on a node-extracted time-series array. Includes a library of routines across Nilearn, scikit-learn, and skggm packages, among others. Parameters ---------- time_series : array 2D m x n array consisting of the time-series signal for each ROI node where m = number of scans and n = number of ROI's. conn_model : str Connectivity estimation model (e.g. corr for correlation, cov for covariance, sps for precision covariance, partcorr for partial correlation). sps type is used by default. dir_path : str Path to directory containing subject derivative data for given run. node_size : int Spherical centroid node size in the case that coordinate-based centroids are used as ROI's. smooth : int Smoothing width (mm fwhm) to apply to time-series when extracting signal from ROI's. dens_thresh : bool Indicates whether a target graph density is to be used as the basis for thresholding. network : str Resting-state network based on Yeo-7 and Yeo-17 naming (e.g. 'Default') used to filter nodes in the study of brain subgraphs. ID : str A subject id or other unique identifier. roi : str File path to binarized/boolean region-of-interest Nifti1Image file. min_span_tree : bool Indicates whether local thresholding from the Minimum Spanning Tree should be used. disp_filt : bool Indicates whether local thresholding using a disparity filter and 'backbone network' should be used. parc : bool Indicates whether to use parcels instead of coordinates as ROI nodes. prune : bool Indicates whether to prune final graph of disconnected nodes/isolates. atlas : str Name of atlas parcellation used. uatlas : str File path to atlas parcellation Nifti1Image in MNI template space. labels : list List of string labels corresponding to ROI nodes. coords : list List of (x, y, z) tuples corresponding to a coordinate atlas used or which represent the center-of-mass of each parcellation node. norm : int Indicates method of normalizing resulting graph. binary : bool Indicates whether to binarize resulting graph edges to form an unweighted graph. hpass : bool High-pass filter values (Hz) to apply to node-extracted time-series. extract_strategy : str The name of a valid function used to reduce the time-series region extraction. Returns ------- conn_matrix : array Adjacency matrix stored as an m x n array of nodes and edges. conn_model : str Connectivity estimation model (e.g. corr for correlation, cov for covariance, sps for precision covariance, partcorr for partial correlation). sps type is used by default. dir_path : str Path to directory containing subject derivative data for given run. node_size : int Spherical centroid node size in the case that coordinate-based centroids are used as ROI's for tracking. smooth : int Smoothing width (mm fwhm) to apply to time-series when extracting signal from ROI's. dens_thresh : bool Indicates whether a target graph density is to be used as the basis for thresholding. network : str Resting-state network based on Yeo-7 and Yeo-17 naming (e.g. 'Default') used to filter nodes in the study of brain subgraphs. ID : str A subject id or other unique identifier. roi : str File path to binarized/boolean region-of-interest Nifti1Image file. min_span_tree : bool Indicates whether local thresholding from the Minimum Spanning Tree should be used. disp_filt : bool Indicates whether local thresholding using a disparity filter and 'backbone network' should be used. parc : bool Indicates whether to use parcels instead of coordinates as ROI nodes. prune : bool Indicates whether to prune final graph of disconnected nodes/isolates. atlas : str Name of atlas parcellation used. uatlas : str File path to atlas parcellation Nifti1Image in MNI template space. labels : list List of string labels corresponding to graph nodes. coords : list List of (x, y, z) tuples corresponding to a coordinate atlas used or which represent the center-of-mass of each parcellation node. norm : int Indicates method of normalizing resulting graph. binary : bool Indicates whether to binarize resulting graph edges to form an unweighted graph. hpass : bool High-pass filter values (Hz) to apply to node-extracted time-series. extract_strategy : str The name of a valid function used to reduce the time-series region extraction. References ---------- .. [1] Varoquaux, G., & Craddock, R. C. (2013). Learning and comparing functional connectomes across subjects. NeuroImage. https://doi.org/10.1016/j.neuroimage.2013.04.007 .. [2] Jason Laska, Manjari Narayan, 2017. skggm 0.2.7: A scikit-learn compatible package for Gaussian and related Graphical Models. doi:10.5281/zenodo.830033 """ from nilearn.connectome import ConnectivityMeasure from sklearn.covariance import GraphicalLassoCV conn_matrix = None if conn_model == 'corr' or conn_model == 'cor' or conn_model == 'correlation': # credit: nilearn print('\nComputing correlation matrix...\n') conn_measure = ConnectivityMeasure(kind='correlation') conn_matrix = conn_measure.fit_transform([time_series])[0] elif conn_model == 'partcorr' or conn_model == 'parcorr' or conn_model == 'partialcorrelation': # credit: nilearn print('\nComputing partial correlation matrix...\n') conn_measure = ConnectivityMeasure(kind='partial correlation') conn_matrix = conn_measure.fit_transform([time_series])[0] elif conn_model == 'cov' or conn_model == 'covariance' or conn_model == 'covar' or conn_model == 'sps' or \ conn_model == 'sparse' or conn_model == 'precision': # Fit estimator to matrix to get sparse matrix estimator_shrunk = None estimator = GraphicalLassoCV(cv=5) try: print('\nComputing covariance...\n') estimator.fit(time_series) except: print('Unstable Lasso estimation--Attempting to re-run by first applying shrinkage...') try: from sklearn.covariance import GraphicalLasso, empirical_covariance, shrunk_covariance emp_cov = empirical_covariance(time_series) for i in np.arange(0.8, 0.99, 0.01): shrunk_cov = shrunk_covariance(emp_cov, shrinkage=i) alphaRange = 10.0 ** np.arange(-8, 0) for alpha in alphaRange: try: estimator_shrunk = GraphicalLasso(alpha) estimator_shrunk.fit(shrunk_cov) print(f"Retrying covariance matrix estimate with alpha={alpha}") if estimator_shrunk is None: pass else: break except: print(f"Covariance estimation failed with shrinkage at alpha={alpha}") continue except ValueError: print('Unstable Lasso estimation! Shrinkage failed. A different connectivity model may be needed.') if estimator is None and estimator_shrunk is None: raise RuntimeError('\nERROR: Covariance estimation failed.') if conn_model == 'sps' or conn_model == 'sparse' or conn_model == 'precision': if estimator_shrunk is None: print('\nFetching precision matrix from covariance estimator...\n') conn_matrix = -estimator.precision_ else: print('\nFetching shrunk precision matrix from covariance estimator...\n') conn_matrix = -estimator_shrunk.precision_ elif conn_model == 'cov' or conn_model == 'covariance' or conn_model == 'covar': if estimator_shrunk is None: print('\nFetching covariance matrix from covariance estimator...\n') conn_matrix = estimator.covariance_ else: conn_matrix = estimator_shrunk.covariance_ elif conn_model == 'QuicGraphicalLasso': try: from inverse_covariance import QuicGraphicalLasso except ImportError: print('Cannot run QuicGraphLasso. Skggm not installed!') # Compute the sparse inverse covariance via QuicGraphLasso # credit: skggm model = QuicGraphicalLasso( init_method='cov', lam=0.5, mode='default', verbose=1) print('\nCalculating QuicGraphLasso precision matrix using skggm...\n') model.fit(time_series) conn_matrix = -model.precision_ elif conn_model == 'QuicGraphicalLassoCV': try: from inverse_covariance import QuicGraphicalLassoCV except ImportError: print('Cannot run QuicGraphLassoCV. Skggm not installed!') # Compute the sparse inverse covariance via QuicGraphLassoCV # credit: skggm model = QuicGraphicalLassoCV( init_method='cov', verbose=1) print('\nCalculating QuicGraphLassoCV precision matrix using skggm...\n') model.fit(time_series) conn_matrix = -model.precision_ elif conn_model == 'QuicGraphicalLassoEBIC': try: from inverse_covariance import QuicGraphicalLassoEBIC except ImportError: print('Cannot run QuicGraphLassoEBIC. Skggm not installed!') # Compute the sparse inverse covariance via QuicGraphLassoEBIC # credit: skggm model = QuicGraphicalLassoEBIC( init_method='cov', verbose=1) print('\nCalculating QuicGraphLassoEBIC precision matrix using skggm...\n') model.fit(time_series) conn_matrix = -model.precision_ elif conn_model == 'AdaptiveQuicGraphicalLasso': try: from inverse_covariance import AdaptiveQuicGraphicalLasso, QuicGraphicalLassoEBIC except ImportError: print('Cannot run AdaptiveGraphLasso. Skggm not installed!') # Compute the sparse inverse covariance via # AdaptiveGraphLasso + QuicGraphLassoEBIC + method='binary' # credit: skggm model = AdaptiveQuicGraphicalLasso( estimator=QuicGraphicalLassoEBIC( init_method='cov', ), method='binary', ) print('\nCalculating AdaptiveQuicGraphLasso precision matrix using skggm...\n') model.fit(time_series) conn_matrix = -model.estimator_.precision_ else: raise ValueError('\nERROR! No connectivity model specified at runtime. Select a valid estimator using the ' '-mod flag.') # Enforce symmetry conn_matrix = np.maximum(conn_matrix, conn_matrix.T) if conn_matrix.shape < (2, 2): raise RuntimeError('\nERROR! Matrix estimation selection yielded an empty or 1-dimensional graph. ' 'Check time-series for errors or try using a different atlas') coords = np.array(coords) labels = np.array(labels) del time_series return (conn_matrix, conn_model, dir_path, node_size, smooth, dens_thresh, network, ID, roi, min_span_tree, disp_filt, parc, prune, atlas, uatlas, labels, coords, norm, binary, hpass, extract_strategy)
stock_list = table.col_values(colx=1, start_rowx=1, end_rowx=33) stock_list = [str(i) for i in stock_list] batch_K_data = get_kdata(stock_list, start='2013-09-01', end='2018-09-01') # 查看最近五年的数据 # print(batch_K_data.info()) stock_dataset, selected_stocks = preprocess_data(batch_K_data, min_K_num=1100) stock_dataset2, selected_stocks2 = preprocess_data2(batch_K_data, min_K_num=1100) print("The selected stocks is: ", selected_stocks) # 这是实际使用的股票列表stock_dataset,selected_stocks=preprocess_data2(batch_K_data,min_K_num=1100) # 从相关性中学习其图形结构 edge_model1 = GraphicalLassoCV(cv=3) edge_model2 = GraphicalLassoCV(cv=3) # edge_model.fit(stock_dataset) edge_model1.fit(stock_dataset) edge_model2.fit(stock_dataset2) # 使用近邻传播算法构建模型,并训练LassoCV graph _, labels1 = affinity_propagation(edge_model1.covariance_) _, labels2 = affinity_propagation(edge_model2.covariance_) n_labels = max(labels1) print('Stock Clusters: {}'.format(n_labels + 1)) # 10,即得到10个类别 sz50_df2 = stock_list # print(sz50_df2) for i in range(n_labels + 1): print('Cluster: {}----> stocks: {}'.format(i, ','.join(np.array(selected_stocks)[labels1 == i]))) # 这个只有股票代码而不是股票名称 stocks = np.array(selected_stocks)[labels1 == i].tolist() # names = sz50_df2.loc[stocks, :].name.tolist() # print('Cluster: {}----> stocks: {}'.format(i,','.join(names)))