示例#1
0
def subject_connectivity(timeseries, subject, atlas_name, kind, save=True, save_path=root_folder):
    """
        timeseries   : timeseries table for subject (timepoints x regions)
        subject      : the subject short ID
        atlas_name   : name of the atlas used
        kind         : the kind of connectivity to be used, e.g. lasso, partial correlation, correlation
        save         : save the connectivity matrix to a file
        save_path    : specify path to save the matrix if different from subject folder

    returns:
        connectivity : connectivity matrix (regions x regions)
    """

    print("Estimating %s matrix for subject %s" % (kind, subject))

    if kind == 'lasso':
        # Graph Lasso estimator
        covariance_estimator = GraphLassoCV(verbose=1)
        covariance_estimator.fit(timeseries)
        connectivity = covariance_estimator.covariance_
        print('Covariance matrix has shape {0}.'.format(connectivity.shape))

    elif kind in ['tangent', 'partial correlation', 'correlation']:
        conn_measure = connectome.ConnectivityMeasure(kind=kind)
        connectivity = conn_measure.fit_transform([timeseries])[0]

    if save:
        subject_file = os.path.join(save_path, subject,
                                    subject + '_' + atlas_name + '_' + kind.replace(' ', '_') + '.mat')
        sio.savemat(subject_file, {'connectivity': connectivity})

    return connectivity
示例#2
0
    def stockCluster(self, centers, data, selectStock):
        '''
        data like
                     600010  600011
                 0    1.7     2.1
                 1    2.3     3.1
        
        stockList like 600010 600011
        '''
        # gmm = GaussianMixture(centers, covariance_type='full', random_state=0)
        # result =gmm.fit(self.data)
        # print(result)
        # kmeans = KMeans(n_clusters=centers)
        # kmeans.fit(self.data)
        # y_kmeans = kmeans.predict(data)
        # return y_kmeans;
        # pass

        edge_model = GraphLassoCV()
        edge_model.fit(data)

        _, labels = affinity_propagation(edge_model.covariance_)
        n_labels = max(labels)
        # 对这41只股票进行了聚类,labels里面是每只股票对应的类别标号
        print('Stock Clusters: {}'.format(n_labels + 1))  # 10,即得到10个类别
        stockList = pd.read_excel("stockList.xls")
        sz50_df2 = stockList.set_index('ts_code')
        # print(sz50_df2)
        for i in range(n_labels + 1):
            # print('Cluster: {}----> stocks: {}'.format(i,','.join(np.array(selected_stocks)[labels==i]))) # 这个只有股票代码而不是股票名称
            # 下面打印出股票名称,便于观察
            stocks = np.array(selectStock)[labels == i].tolist()
            names = sz50_df2.loc[stocks, :].name.tolist()
            print('Cluster: {}----> stocks: {}'.format(i, ','.join(names)))
示例#3
0
def calculate_connectivity_matrix(in_data, extraction_method):
    '''
    after extract_parcellation_time_series() connectivity matrices are calculated via specified extraction method

    returns np.array with matrixand saves this array also to matrix_file
    '''

    # fixme implement sparse inv covar
    import os, pickle
    import numpy as np

    if extraction_method == 'correlation':
        correlation_matrix = np.corrcoef(in_data.T)
        matrix = {'correlation': correlation_matrix}

    elif extraction_method == 'sparse_inverse_covariance':
        # Compute the sparse inverse covariance
        from sklearn.covariance import GraphLassoCV
        estimator = GraphLassoCV()
        estimator.fit(in_data)
        matrix = {
            'covariance': estimator.covariance_,
            'sparse_inverse_covariance': estimator.precision_
        }

    else:
        raise (Exception('Unknown extraction method: %s' % extraction_method))

    matrix_file = os.path.join(os.getcwd(), 'matrix.pkl')
    with open(matrix_file, 'w') as f:
        pickle.dump(matrix, f)

    return matrix, matrix_file
示例#4
0
def cluster(stock_dataset, selected_stocks, sz50_df2):
    #根据相关性学习图结构
    edge_model = GraphLassoCV()
    edge_model.fit(stock_dataset)
    #根据协方差进行AP聚类,取相似度中值,cluster_centers_indices_
    cluster_centers_indices_, labels = affinity_propagation(
        edge_model.covariance_)
    #print(cluster_centers_indices_)
    n_labels = max(labels)
    # 对股票进行了聚类,labels里面是每只股票对应的类别标号
    print('Stock Clusters: {}'.format(n_labels + 1))  # 10,即得到10个类别
    # 获取质心股票代码
    mass = []
    for n in cluster_centers_indices_:
        mass.append(selected_stocks[n])
    #获取股票名称
    center_name = sz50_df2.loc[mass, :].name.tolist()
    #写入文件
    center = pd.DataFrame(np.column_stack((mass, center_name)),
                          columns=['code', 'name'])
    center.to_csv(str('./cluster/center.csv'))
    for i in range(n_labels + 1):
        # 下面打印出股票名称,便于观察
        stocks = np.array(selected_stocks)[labels == i].tolist()
        names = sz50_df2.loc[stocks, :].name.tolist()
        print('Cluster: {}----> stocks: {}'.format(str(i), ','.join(names)))
        result = pd.DataFrame(np.column_stack((stocks, names)),
                              columns=['code', 'name'])
        result.to_csv(str('./cluster/cluster ' + str(i) + '.csv'))
示例#5
0
文件: utils.py 项目: fliem/LeiCA_LIFE
def calculate_connectivity_matrix(in_data, extraction_method):
    '''
    after extract_parcellation_time_series() connectivity matrices are calculated via specified extraction method

    returns np.array with matrixand saves this array also to matrix_file
    '''

    # fixme implement sparse inv covar
    import os, pickle
    import numpy as np

    if extraction_method == 'correlation':
        correlation_matrix = np.corrcoef(in_data.T)
        matrix = {'correlation': correlation_matrix}

    elif extraction_method == 'sparse_inverse_covariance':
        # Compute the sparse inverse covariance
        from sklearn.covariance import GraphLassoCV
        estimator = GraphLassoCV()
        estimator.fit(in_data)
        matrix = {'covariance': estimator.covariance_,
                  'sparse_inverse_covariance': estimator.precision_}

    else:
        raise (Exception('Unknown extraction method: %s' % extraction_method))

    matrix_file = os.path.join(os.getcwd(), 'matrix.pkl')
    with open(matrix_file, 'w') as f:
        pickle.dump(matrix, f)

    return matrix, matrix_file
示例#6
0
def get_conn_matrix(time_series, conn_model, NETWORK, ID, dir_path, thr):
    if conn_model == 'corr':
        conn_measure = ConnectivityMeasure(kind='correlation')
        conn_matrix = conn_measure.fit_transform([time_series])[0]
        est_path = dir_path + '/' + ID + '_est_corr' + '_' + str(thr) + '.txt'
    elif conn_model == 'corr_fast':
        try:
            conn_matrix = compute_correlation(time_series,time_series)
            est_path = dir_path + '/' + ID + '_est_corr_fast' + '_' + str(thr) + '.txt'
        except RuntimeError:
            print('Cannot run accelerated correlation computation due to a missing dependency. You need brainiak installed!')
    elif conn_model == 'partcorr':
        conn_measure = ConnectivityMeasure(kind='partial correlation')
        conn_matrix = conn_measure.fit_transform([time_series])[0]
        est_path = dir_path + '/' + ID + '_est_part_corr' + '_' + str(thr) + '.txt'
    elif conn_model == 'cov' or conn_model == 'sps':
        ##Fit estimator to matrix to get sparse matrix
        estimator = GraphLassoCV()
        try:
            print("Fitting Lasso estimator...")
            est = estimator.fit(time_series)
        except RuntimeError:
            print('Unstable Lasso estimation--Attempting to re-run by first applying shrinkage...')
            #from sklearn.covariance import GraphLasso, empirical_covariance, shrunk_covariance
            #emp_cov = empirical_covariance(time_series)
            #for i in np.arange(0.8, 0.99, 0.01):
                #shrunk_cov = shrunk_covariance(emp_cov, shrinkage=i)
                #alphaRange = 10.0 ** np.arange(-8,0)
                #for alpha in alphaRange:
                    #try:
                        #estimator_shrunk = GraphLasso(alpha)
                        #est=estimator_shrunk.fit(shrunk_cov)
                        #print("Calculated graph-lasso covariance matrix for alpha=%s"%alpha)
                        #break
                    #except FloatingPointError:
                        #print("Failed at alpha=%s"%alpha)
            #if estimator_shrunk == None:
                #pass
            #else:
                #break
            print('Unstable Lasso estimation. Try again!')
            sys.exit()

        if NETWORK != None:
            est_path = dir_path + '/' + ID + '_' + NETWORK + '_est%s'%('_sps_inv' if conn_model=='sps' else 'cov') + '_' + str(thr) + '.txt'
        else:
            est_path = dir_path + '/' + ID + '_est%s'%('_sps_inv' if conn_model=='sps' else 'cov') + '_' + str(thr) + '.txt'
        if conn_model == 'sps':
            try:
                conn_matrix = -estimator.precision_
            except:
                conn_matrix = -estimator_shrunk.precision_
        elif conn_model == 'cov':
            try:
                conn_matrix = estimator.covariance_
            except:
                conn_matrix = estimator_shrunk.covariance_
    np.savetxt(est_path, conn_matrix, delimiter='\t')
    return(conn_matrix, est_path)
示例#7
0
def graph_lasso(X, num_folds):
    """Estimate inverse covariance via scikit-learn GraphLassoCV class.
    """
    print("GraphLasso (sklearn)")
    model = GraphLassoCV(cv=num_folds)
    model.fit(X)
    print("   lam_: {}".format(model.alpha_))
    return model.covariance_, model.precision_, model.alpha_
 def _cov_selection(self, alphas=4, n_refinements=4, cv=None):
     from sklearn.covariance import GraphLassoCV
     gl = GraphLassoCV(alphas=alphas,
                       n_refinements=n_refinements,
                       cv=cv,
                       assume_centered=True)
     gl.fit(self.ret)
     return gl.covariance_, gl.precision_
示例#9
0
def graph_lasso(X, num_folds):
    '''Estimate inverse covariance via scikit-learn GraphLassoCV class.
    '''
    print 'GraphLasso (sklearn)'
    model = GraphLassoCV(cv=num_folds)
    model.fit(X)
    print '   lam_: {}'.format(model.alpha_)
    return model.covariance_, model.precision_, model.alpha_
    def _generate_structure_K(self, X):
        lasso = GraphLassoCV(alphas=20)

        lasso.fit(X.T)
        K_structure = lasso.get_precision()

        if (hasattr(lasso, 'alpha_')):
            print('alpha=', lasso.alpha_)

        return K_structure
示例#11
0
def cal_connectome(fmri_ff,
                   confound_ff,
                   atlas_ff,
                   outputjpg_ff,
                   metric='correlation',
                   labelrange=None,
                   label_or_map=0):
    if label_or_map == 0:
        # “correlation”, “partial correlation”, “tangent”, “covariance”, “precision”
        masker = NiftiLabelsMasker(labels_img=atlas_ff,
                                   standardize=True,
                                   verbose=0)
    else:
        masker = NiftiMapsMasker(maps_img=atlas_ff,
                                 standardize=True,
                                 verbose=0)

    time_series_0 = masker.fit_transform(fmri_ff, confounds=confound_ff)
    if labelrange is None:
        labelrange = np.arange(time_series_0.shape[1])
    time_series = time_series_0[:, labelrange]
    if metric == 'sparse inverse covariance':
        try:
            estimator = GraphLassoCV()
            estimator.fit(time_series)
            correlation_matrix = -estimator.precision_
        except:
            correlation_matrix = np.zeros(
                (time_series.shape[1], time_series.shape[1]))
    else:
        correlation_measure = ConnectivityMeasure(kind=metric)
        correlation_matrix = correlation_measure.fit_transform([time_series
                                                                ])[0]

    # Plot the correlation matrix

    fig = plt.figure(figsize=(6, 5), dpi=100)
    plt.clf()
    # Mask the main diagonal for visualization:
    np.fill_diagonal(correlation_matrix, 0)

    plt.imshow(correlation_matrix,
               interpolation="nearest",
               cmap="RdBu_r",
               vmax=0.8,
               vmin=-0.8)
    plt.gca().yaxis.tick_right()
    plt.axis('off')
    plt.colorbar()
    plt.title(metric.title(), fontsize=12)
    plt.tight_layout()
    fig.savefig(outputjpg_ff, bbox_inches='tight')
    plt.close()
    return correlation_matrix
示例#12
0
 def __init__(self, original_matrix):
     '''
     :param dmatrix: X is an instances list(matrix)
     '''
     # X = [x(1), x(2), ..., x(len)], with dim number of features
     self._X = np.matrix(original_matrix)
     self._len, self._dim = self._X.shape
     glasso_model = GraphLassoCV()
     glasso_model.fit(self._X)
     self._glasso_covariance = glasso_model.covariance_
     self._glasso_precision = glasso_model.precision_
def run_clustering(methods, cases):
    true_method_groups = [m[1] for m in methods]
    edge_model = GraphLassoCV(alphas=4, n_refinements=5, n_jobs=3, max_iter=100)
    edge_model.fit(cases)
    CV = edge_model.covariance_
    
    num_clusters=3
    spectral = SpectralClustering(n_clusters=num_clusters,affinity='precomputed') 
    spectral.fit(np.asarray(CV))
    spec_sort=np.argsort(spectral.labels_)
    
    for i,m in enumerate(methods):
        print "%s:%d\t%s"%(m[1],spectral.labels_[i],m[0])
    print "Adj. Rand Score: %f"%adjusted_rand_score(spectral.labels_,true_method_groups)
示例#14
0
def corrcov(arr, typedat):
    #eng = matlab.engine.start_matlab()
    #out = eng.partialcorr(matlab.double(arr.tolist()))
    #fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(25,15))
    #im = axes[0].imshow(out)
    #return np.array(out)

    #im1 = axes[0,0].imshow(P_corr)
    #fig.colorbar(im1, ax=axes[0,0])
    if typedat == 'Partial correlation':
        out = partialcorr(arr)
    elif typedat == 'GraphLassoCV covariance':
        estimator = GraphLassoCV()
        estimator.fit(arr)
        out = estimator.covariance_
    elif typedat == 'GraphLassoCV precision':
        estimator = GraphLassoCV()
        estimator.fit(arr)
        out = estimator.precision_
    elif typedat == 'Covariance':
        out = np.cov(arr.transpose())
    elif typedat == 'Correlation':
        out = np.corrcoef(arr.transpose())
    # im2 = axes[0,1].imshow(covar)
    # fig.colorbar(im2, ax=axes[0, 1])
    # im3 = axes[0,2].imshow(inverscovar)
    # fig.colorbar(im3, ax=axes[0, 2])
    # im4 = axes[1,0].imshow(covar2)
    # fig.colorbar(im4, ax=axes[1, 0])
    # im5 = axes[1,2].imshow(corr)
    # fig.colorbar(im5, ax=axes[1, 2])
    # fig.savefig('partialcorr.png', bbox_inches='tight')
    return out
示例#15
0
class TestStatisticalPower(object):
    @pytest.mark.parametrize("params_in", [
        ({
            'model_selection_estimator': QuicGraphLassoCV(),
            'n_trials': 20,
            'n_features': 25,
        }),
        ({
            'model_selection_estimator': QuicGraphLassoEBIC(),
            'n_trials': 20,
            'n_features': 10,
            'n_jobs': 2,
        }),
        ({
            'model_selection_estimator': GraphLassoCV(),
            'n_trials': 20,
            'n_features': 20,
            'penalty_': 'alpha_',
        }),
    ])
    def test_integration_statistical_power(self, params_in):
        '''
        Just tests inputs/outputs (not validity of result).
        '''
        X = datasets.load_diabetes().data
        sp = StatisticalPower(**params_in)
        sp.fit(X)

        num_k = 5

        assert np.sum(sp.results_.flat) > 0
        assert sp.results_.shape == (num_k, sp.n_grid_points)
        assert len(sp.ks_) == num_k
        assert len(sp.grid_) == sp.n_grid_points
示例#16
0
    def _generate_structure_K(self, X):
        # lasso = GraphLasso(alpha=0.012)
        lasso = GraphLassoCV(alphas=20)

        lasso.fit(X.T)
        K_structure = lasso.get_precision()

        if (hasattr(lasso, 'alpha_')):
            print('alpha=', lasso.alpha_)

        M = (np.abs(K_structure) > 1e-10)
        if (M == np.eye(M.shape[0], dtype=bool)).all():
            print('Got identity structure')
        # K_structure = np.ones(K_lasso.shape)

        return K_structure
    def test_estimate_covariance(self):
        configuration = {
            'feature_config_list': [
                {
                    'name': 'close',
                    'normalization': 'standard',
                    'transformation': {'name': 'log-return'},
                    'is_target': True,
                    'local': False,
                },
            ],
            'fill_limit': 0,
            'exchange_name': 'NYSE',
            'features_ndays': 9,
            'features_resample_minutes': 15,
            'features_start_market_minute': 60,
            'prediction_frequency_ndays': 1,
            'prediction_market_minute': 60,
            'target_delta_ndays': 1,
            'target_market_minute': 60,
            'n_classification_bins': 12,
            'nassets': 3,
            'classify_per_series': False,
            'normalise_per_series': False
        }

        data_transformation = FinancialDataTransformation(configuration)
        universe, data = self._prepare_data_for_test()
        estimation_method = "Ledoit"
        exchange_calendar = data_transformation.exchange_calendar
        ndays = data_transformation.features_ndays  # FIXME this is the only value that works now.
        forecast_interval = data_transformation.target_delta_ndays
        target_market_minute = data_transformation.target_market_minute
        covariance_matrix = estimate_covariance(data, ndays, target_market_minute, estimation_method, exchange_calendar,
                                                forecast_interval)

        ret_data = returns_minutes_after_market_open_data_frame(data['close'], exchange_calendar, target_market_minute)
        print(ret_data.shape)
        nd = ret_data.shape[1]
        sampling_days = nd * DEFAULT_NUM_REALISATIONS_MULTIPLICATION_FACTOR
        data_points = ret_data.values[-sampling_days:, :]
        glass_model = GraphLassoCV()
        glass_model.fit(data_points)
        cov_mat = glass_model.covariance_
        self.assertTrue(np.allclose(covariance_matrix.diagonal(), cov_mat.diagonal()))
 def __init__(self, n_components=2, n_iter=5, alpha=None):
     self.n_components = n_components
     self.n_iter = n_iter
     self.min_covar = 1e-3
     if alpha == None:
         self.alpha = [10 for _ in range(self.n_components)]
     else:
         self.alpha = alpha
     self.model = [GraphLassoCV() for k in range(self.n_components)]
示例#19
0
def group_connectivity(timeseries,
                       subject_list,
                       atlas_name,
                       kind,
                       save=True,
                       save_path=root_folder):
    """
        timeseries   : list of timeseries tables for subjects (timepoints x regions)
        subject_list : the subject short IDs list
        atlas_name   : name of the atlas used
        kind         : the kind of connectivity to be used, e.g. lasso, partial correlation, correlation
        save         : save the connectivity matrix to a file
        save_path    : specify path to save the matrix if different from subject folder

    returns:
        connectivity : connectivity matrix (regions x regions)
    """

    if kind == 'lasso':
        # Graph Lasso estimator
        covariance_estimator = GraphLassoCV(verbose=1)
        connectivity_matrices = []

        for i, ts in enumerate(timeseries):
            covariance_estimator.fit(ts)
            connectivity = covariance_estimator.covariance_
            connectivity_matrices.append(connectivity)
            print('Covariance matrix has shape {0}.'.format(
                connectivity.shape))

    elif kind in ['tangent', 'partial correlation', 'correlation']:
        conn_measure = connectome.ConnectivityMeasure(kind=kind)
        connectivity_matrices = conn_measure.fit_transform(timeseries)

    if save:
        for i, subject in enumerate(subject_list):
            subject_file = os.path.join(
                save_path, subject_list[i], subject_list[i] + '_' +
                atlas_name + '_' + kind.replace(' ', '_') + '.mat')
            sio.savemat(subject_file,
                        {'connectivity': connectivity_matrices[i]})
            print("Saving connectivity matrix to %s" % subject_file)

    return connectivity_matrices
def main():
    sample, genes, raw_expression, cov = load_data()
    expression = raw_expression[raw_expression.min(1) > 100]
    expression_indices = numpy.nonzero(raw_expression.sum(1) > 6)[0].tolist()
    
    ## reorder and filter data
    #rep1_cols = numpy.array((3,0,5)) # 8 is co culture
    #rep2_cols = numpy.array((4,2,7)) # 9 is MRC5
    expression = expression[:,(3,4,0,2,5,7)]

    # log data
    expression = numpy.log10(expression + 1)[1:100,]
    cov = expression.dot(expression.T)
    print cov.shape
    #mo = GraphLasso(alpha=95, mode='lars', verbose=True) #, cv=KFold(3,2), n_jobs=24)
    mo = GraphLassoCV(mode='lars', verbose=True, cv=KFold(3,2), n_jobs=24)
    sparse_cov = mo.fit(cov)
    print( numpy.nonzero(sparse_cov)[0].sum() )
    return
def test_graph_lasso_cv(random_state=1):
    # Sample data from a sparse multivariate normal
    dim = 5
    n_samples = 6
    random_state = check_random_state(random_state)
    prec = make_sparse_spd_matrix(dim, alpha=.96, random_state=random_state)
    cov = linalg.inv(prec)
    X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
    # Capture stdout, to smoke test the verbose mode
    orig_stdout = sys.stdout
    try:
        sys.stdout = StringIO()
        # We need verbose very high so that Parallel prints on stdout
        GraphLassoCV(verbose=100, alphas=5, tol=1e-1).fit(X)
    finally:
        sys.stdout = orig_stdout

    # Smoke test with specified alphas
    GraphLassoCV(alphas=[0.8, 0.5], tol=1e-1, n_jobs=1).fit(X)
示例#22
0
def main():
    sample, genes, raw_expression, cov = load_data()
    expression = raw_expression[raw_expression.min(1) > 100]
    expression_indices = numpy.nonzero(raw_expression.sum(1) > 6)[0].tolist()

    ## reorder and filter data
    #rep1_cols = numpy.array((3,0,5)) # 8 is co culture
    #rep2_cols = numpy.array((4,2,7)) # 9 is MRC5
    expression = expression[:, (3, 4, 0, 2, 5, 7)]

    # log data
    expression = numpy.log10(expression + 1)[1:100, ]
    cov = expression.dot(expression.T)
    print cov.shape
    #mo = GraphLasso(alpha=95, mode='lars', verbose=True) #, cv=KFold(3,2), n_jobs=24)
    mo = GraphLassoCV(mode='lars', verbose=True, cv=KFold(3, 2), n_jobs=24)
    sparse_cov = mo.fit(cov)
    print(numpy.nonzero(sparse_cov)[0].sum())
    return
def get_BP4D_prescion_matrix(label_file_dir):
    adaptive_AU_database("BP4D")
    alpha = 0.2
    model = GraphLassoCV(alphas=100,
                         cv=10,
                         max_iter=10,
                         tol=1e-5,
                         verbose=True,
                         mode="lars",
                         assume_centered=False,
                         n_jobs=100)

    X = []
    for file_name in os.listdir(label_file_dir):  # each file is a video
        AU_column_idx = {}
        with open(label_file_dir + "/" + file_name,
                  "r") as au_file_obj:  # each file is a video

            for idx, line in enumerate(au_file_obj):

                if idx == 0:  # header specify Action Unit
                    for col_idx, AU in enumerate(line.split(",")[1:]):
                        AU_column_idx[AU] = col_idx + 1  # read header
                    continue  # read head over , continue

                lines = line.split(",")
                frame = lines[0]
                au_labels = [AU for AU in config.AU_ROI.keys() \
                                 if int(lines[AU_column_idx[AU]]) == 1]
                AU_bin = np.zeros(len(config.AU_SQUEEZE))
                for AU in au_labels:
                    bin_idx = config.AU_SQUEEZE.inv[AU]
                    np.put(AU_bin, bin_idx, 1)
                X.append(AU_bin)
    X = np.array(X)
    print(X.shape)
    # X = np.transpose(X)
    model.fit(X)
    cov_ = model.covariance_
    prec_ = model.precision_

    return {"prec": prec_, "cov": cov_}
示例#24
0
def _parallelize_4D_func_loading(f, atlas, method):

    func = nib.load(f)
    roi_masker = NiftiLabelsMasker(labels_img=atlas,
                                   standardize=True,
                                   resampling_target=None)

    time_series = roi_masker.fit_transform(func)

    if method == 'corr':
        conn = np.corrcoef(time_series.T)
    elif method == 'invcorr':
        graphlasso = GraphLassoCV()
        graphlasso.fit(time_series)
        conn = graphlasso.precision_
    else:
        raise ValueError('Specify either corr or invcorr')

    conn = conn[np.tril_indices(conn.shape[0], k=-1)].ravel()
    return conn[np.newaxis, :]
def computePartialCorrelationsCV(coupling_data):

    # standardize
    coupling_data -= coupling_data.mean(axis=0)
    coupling_data /= coupling_data.std(axis=0)


    estimator = GraphLassoCV(alphas=10)
    estimator.fit(coupling_data)
    prec = estimator.get_precision()
    reg_alpha = estimator.alpha_


    #### partial correlations: rho_ij = - p_ij/ sqrt(p_ii * p_jj)
    #diagonal of precision matrix
    prec_diag = np.diag(prec)
    partial_correlations = -prec / np.sqrt(np.outer(prec_diag, prec_diag))

    # set lower half to zero
    partial_correlations[np.tril_indices(400)] = 0

    return estimator.get_precision(), partial_correlations, reg_alpha
示例#26
0
def compute_connectivity_subject(conn, func, masker):
    """ Returns connectivity of one fMRI for a given atlas
    """
    ts = masker.fit_transform(func)

    if conn == 'gl':
        fc = GraphLassoCV(max_iter=1000)
    elif conn == 'lw':
        fc = LedoitWolf()
    elif conn == 'oas':
        fc = OAS()
    elif conn == 'scov':
        fc = ShrunkCovariance()
    elif conn == 'corr' or conn == 'pcorr':
        fc = Bunch(covariance_=0, precision_=0)

    if conn == 'corr' or conn == 'pcorr':
        fc.covariance_ = np.corrcoef(ts)
        fc.precision_ = partial_corr(ts)
    else:
        fc.fit(ts)
    ind = np.tril_indices(ts.shape[1], k=-1)
    return fc.covariance_[ind], fc.precision_[ind]
def get_DISFA_prescion_matrix(label_file_dir):
    adaptive_AU_database("DISFA")
    alpha = 0.2
    model = GraphLassoCV(alphas=100,
                         cv=10,
                         max_iter=100,
                         tol=1e-5,
                         verbose=True,
                         mode="lars",
                         assume_centered=False,
                         n_jobs=100)
    X = []
    for file_name in os.listdir(label_file_dir):
        subject_filename = label_file_dir + os.sep + file_name
        frame_label = defaultdict(dict)
        for au_file in os.listdir(subject_filename):
            abs_filename = subject_filename + "/" + au_file
            AU = au_file[au_file.rindex("_") + 3:au_file.rindex(".")]
            with open(abs_filename, "r") as file_obj:
                for line in file_obj:
                    frame, AU_label = line.strip().split(",")
                    # AU_label = int(AU_label)
                    AU_label = 0 if int(
                        AU_label) < 3 else 1  # 居然<3的不要,但是也取得了出色的效果
                    frame_label[int(frame)][AU] = int(AU_label)
        for frame, AU_dict in frame_label.items():
            AU_bin = np.zeros(len(config.AU_SQUEEZE))
            for AU, AU_label in AU_dict.items():
                bin_idx = config.AU_SQUEEZE.inv[AU]
                np.put(AU_bin, bin_idx, AU_label)
            X.append(AU_bin)
    X = np.array(X)
    print(X.shape)
    model.fit(X)
    cov_ = model.covariance_
    prec_ = model.precision_
    return {"prec": prec_, "cov": cov_}
def test_graph_lasso_cv(random_state=1):
    # Sample data from a sparse multivariate normal
    dim = 5
    n_samples = 6
    random_state = check_random_state(random_state)
    prec = make_sparse_spd_matrix(dim, alpha=.96,
                                  random_state=random_state)
    cov = linalg.inv(prec)
    X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
    # Capture stdout, to smoke test the verbose mode
    orig_stdout = sys.stdout
    try:
        sys.stdout = StringIO()
        GraphLassoCV(verbose=10, alphas=3).fit(X)
    finally:
        sys.stdout = orig_stdout
def test_deprecated_grid_scores(random_state=1):
    dim = 5
    n_samples = 6
    random_state = check_random_state(random_state)
    prec = make_sparse_spd_matrix(dim, alpha=.96, random_state=random_state)
    cov = linalg.inv(prec)
    X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
    graph_lasso = GraphLassoCV(alphas=[0.8, 0.5], tol=1e-1, n_jobs=1)
    graph_lasso.fit(X)

    depr_message = ("Attribute grid_scores was deprecated in version "
                    "0.19 and will be removed in 0.21. Use "
                    "``grid_scores_`` instead")

    assert_warns_message(DeprecationWarning, depr_message,
                         lambda: graph_lasso.grid_scores)
    assert_equal(graph_lasso.grid_scores, graph_lasso.grid_scores_)
def mgsparse(matrix, dimred=0, cutoff=1, eigtype = 'b'):
    ''' Plot ROC curve for using sparse covariance and precision matrix for multivariate gaussian classifier
    Input:
            matrix = n-by-m pandas data frame, each row is one bacteria strain, each column is one subject
            dimred = reduce dimensionality of covariance and inverse of covariance to n < len(x) if n is specified specified, 
            else no reduction in dimensions  
            cutoff = cutoff to top eigenvalues if specified, maybe less than n
            eigtype = pick n random(r), biggest(b) or smallest(s) eigenvalues to construct matrix B, default is biggest(b)
    Output:
            auc = area under ROC curve
    '''
    
    # convert matrix from Pandas data frame to array
    m = matrix.values
    
    # control and CD subjects
    con = (m.T[252:])
    cd = (m.T[:252])
    
    # get mean for each strain 
    conmean = vmean(con.T)
    cdmean = vmean(cd.T)
    
    
    # sparse covariance and precision matrix for control

    conglasso = GraphLassoCV()
    conglasso.fit(con)

    concov = conglasso.covariance_
    concovinv = conglasso.precision_

    # covariance and precision matrix for CD 

    cdglasso = GraphLassoCV()
    cdglasso.fit(cd)

    cdcov = cdglasso.covariance_
    cdcovinv = cdglasso.precision_
    
    
    listac = ndgaussianfitsparse(c24g, c24gmean, sc24gcov, sc24gcovinv, dimred = r)
    listbc = ndgaussianfitsparse(c24g, cd24gmean, scd24gcov, scd24gcovinv, dimred = r)
    listacd = ndgaussianfitsparse(cd24g, c24gmean, sc24gcov, sc24gcovinv, dimred= r)
    listbcd = ndgaussianfitsparse(cd24g, cd24gmean, scd24gcov, scd24gcovinv, dimred= r)
    auc = ndaucsklearn(listac, listbc, listacd, listbcd,  252, 172, tol = 2)
    
    return auc
示例#31
0
class TestAverageError(object):
    @pytest.mark.parametrize("params_in", [
        ({
            'model_selection_estimator': QuicGraphLassoCV(),
            'n_trials': 20,
            'n_features': 25,
        }),
        ({
            'model_selection_estimator': QuicGraphLassoEBIC(),
            'n_trials': 20,
            'n_features': 10,
            'n_jobs': 2,
        }),
        ({
            'model_selection_estimator': GraphLassoCV(),
            'n_trials': 20,
            'n_features': 20,
            'penalty_': 'alpha_',
        }),
    ])
    def test_integration_statistical_power(self, params_in):
        '''
        Just tests inputs/outputs (not validity of result).
        '''
        X = datasets.load_diabetes().data
        ae = AverageError(**params_in)
        ae.fit(X)

        num_k = 3

        assert np.sum(ae.error_fro_.flat) > 0
        assert ae.error_fro_.shape == (num_k, ae.n_grid_points)
        assert np.sum(ae.error_supp_.flat) > 0
        assert ae.error_supp_.shape == (num_k, ae.n_grid_points)
        assert np.sum(ae.error_fp_.flat) > 0
        assert ae.error_fp_.shape == (num_k, ae.n_grid_points)
        assert np.sum(ae.error_fn_.flat) > 0
        assert ae.error_fn_.shape == (num_k, ae.n_grid_points)
        assert len(ae.ks_) == num_k
        assert len(ae.grid_) == ae.n_grid_points
def compute_connectivity_voxel(roi, voxel, conn):
    """ Returns connectivity of one voxel for a given roi
    """

    if conn == 'gl':
        fc = GraphLassoCV(max_iter=1000)
    elif conn == 'lw':
        fc = LedoitWolf()
    elif conn == 'oas':
        fc = OAS()
    elif conn == 'scov':
        fc = ShrunkCovariance()

    ts = np.array([roi, voxel]).T

    if conn == 'corr' or conn == 'pcorr':
        cov = np.corrcoef(ts)[0, 1]
    else:
        fc.fit(ts)
        cov = fc.covariance_[0, 0]

    return cov
示例#33
0
    def set_optimal_shrinkage_amount(self, X, verbose=False):
        """

        Parameters
        ----------
        X: array-like, shape = [n_samples, n_features]
          Training data, where n_samples is the number of samples
          and n_features is the number of features.

        Returns
        -------
        optimal_shrinkage: The optimal amount of shrinkage, chosen with a
        10-fold cross-validation. (or a Leave-One Out cross-validation
        if n_samples < 10).

        """
        n_samples, n_features = X.shape
        std_shrinkage = np.trace(empirical_covariance(X)) / \
            float(n_samples * n_features)
        # use L2 here? (was done during research work, changed for consistency)
        rmcd = RMCDl1(shrinkage=std_shrinkage).fit(X)
        cov = GraphLassoCV().fit(X[rmcd.raw_support_])
        self.shrinkage = cov.alpha_
        return cov.cv_alphas_, cov.cv_scores
示例#34
0
                              random_state=prng)
cov = linalg.inv(prec)
d = np.sqrt(np.diag(cov))
cov /= d
cov /= d[:, np.newaxis]
prec *= d
prec *= d[:, np.newaxis]
X = prng.multivariate_normal(np.zeros(n_features), cov, size=n_samples)
X -= X.mean(axis=0)
X /= X.std(axis=0)

##############################################################################
# Estimate the covariance
emp_cov = np.dot(X.T, X) / n_samples

model = GraphLassoCV()
model.fit(X)
cov_ = model.covariance_
prec_ = model.precision_

lw_cov_, _ = ledoit_wolf(X)
lw_prec_ = linalg.inv(lw_cov_)

##############################################################################
# Plot the results
pl.figure(figsize=(10, 6))
pl.subplots_adjust(left=0.02, right=0.98)

# plot the covariances
covs = [('Empirical', emp_cov), ('Ledoit-Wolf', lw_cov_),
        ('GraphLasso', cov_), ('True', cov)]
# Run group-sparse covariance on all subjects
from nilearn.group_sparse_covariance import GroupSparseCovarianceCV
gsc = GroupSparseCovarianceCV(max_iter=50, verbose=1)
gsc.fit(subjects)

for n in range(n_displayed):
    plt.subplot(n_displayed, 4, 4 * n + 2)
    plot_matrix(gsc.precisions_[..., n])
    if n == 0:
        plt.title("group-sparse\n$\\alpha=%.2f$" % gsc.alpha_)


# Fit one graph lasso per subject
from sklearn.covariance import GraphLassoCV
gl = GraphLassoCV(verbose=1)

for n, subject in enumerate(subjects[:n_displayed]):
    gl.fit(subject)

    plt.subplot(n_displayed, 4, 4 * n + 3)
    plot_matrix(gl.precision_)
    if n == 0:
        plt.title("graph lasso")
    plt.ylabel("$\\alpha=%.2f$" % gl.alpha_)


# Fit one graph lasso for all subjects at once
import numpy as np
gl.fit(np.concatenate(subjects))
示例#36
0
    nmm = NiftiMapsMasker(
        mask_img=mask_file, maps_img=icas_path, resampling_target='mask',
        standardize=True, detrend=True)
    nmm.fit()
    nmm.maps_img_.to_filename('dbg_ica_maps.nii.gz')

    FS_netproj = nmm.transform(all_sub_rs_maps)
    np.save('%i_nets_timeseries' % sub_id, FS_netproj)

    # compute network sparse inverse covariance
    from sklearn.covariance import GraphLassoCV
    from nilearn.image import index_img
    from nilearn import plotting

    try:
        gsc_nets = GraphLassoCV(verbose=2, alphas=20)
        gsc_nets.fit(FS_netproj)

        np.save('%i_nets_cov' % sub_id, gsc_nets.covariance_)
        np.save('%i_nets_prec' % sub_id, gsc_nets.precision_)
    except:
        pass

    ###############################################################################
    # dump region poolings
    ###############################################################################
    from nilearn.image import resample_img

    crad = ds.fetch_atlas_craddock_2012()
    # atlas_nii = index_img(crad['scorr_mean'], 19)  # Craddock 200 region atlas
    atlas_nii = index_img(crad['scorr_mean'], 9)  # Craddock 100 region atlas
EFA=True
survey_HCA = get_EFA_HCA(all_results['survey'], EFA)
survey_order = survey_HCA['reorder_vec']
task_HCA = get_EFA_HCA(all_results['task'], EFA)
task_order = task_HCA['reorder_vec']


all_data = pd.concat([all_results['task'].data.iloc[:, task_order], 
                      all_results['survey'].data.iloc[:, survey_order]], 
                    axis=1)
out, tuning = qgraph_cor(all_data, glasso=True, gamma=.5)

# recreate with sklearn just to check
data = scale(all_data)
clf = GraphLassoCV()
clf.fit(data)

sklearn_covariance = clf.covariance_[np.tril_indices_from(clf.covariance_)]
qgraph_covariance = out.values[np.tril_indices_from(out)]
method_correlation = np.corrcoef(sklearn_covariance, qgraph_covariance)[0,1]
assert method_correlation > .99

def add_attributes(g):
    g.vs['measurement'] = ['task']*len(task_order) + ['survey']*len(survey_order)
    task_clusters = task_HCA['labels'][task_order]
    survey_clusters = survey_HCA['labels'][survey_order] + max(task_clusters)
    g.vs['cluster'] = np.append(task_clusters, survey_clusters)
    
save_loc = path.join(path.dirname(all_results['task'].get_output_dir()), 
                     'graph_results')
# -*- coding: utf-8 -*-
"""
Created on Mon Sep 12 10:16:16 2016

@author: jonyoung
"""

import connectivity_utils as utils
import numpy as np
import scipy.linalg as la
from sklearn.covariance import GraphLassoCV, ledoit_wolf, GraphLasso
from sklearn.preprocessing import scale


connectivity_data = utils.load_hcp_matrix('/home/jonyoung/IoP_data/Data/HCP_PTN820/node_timeseries/3T_HCP820_MSMAll_d15_ts2/715950.txt');

print connectivity_data
print np.shape(connectivity_data)
print np.std(connectivity_data, axis = 1)
connectivity_data = connectivity_data[:, :250]
X = scale(connectivity_data, axis=1)
model = GraphLassoCV(max_iter=1500, assume_centered=True)
model.fit(np.transpose(X))
timeseries = spheres_masker.fit_transform(fmri_filename, confounds=confounds_filename)

###############################################################################
# Estimate correlations
# ---------------------
#
# All starts with the estimation of the signals **covariance** matrix. Here the
# number of ROIs exceeds the number of samples,
print("time series has {0} samples".format(timeseries.shape[0]))

###############################################################################
# in which situation the graphical lasso **sparse inverse covariance**
# estimator captures well the covariance **structure**.
from sklearn.covariance import GraphLassoCV

covariance_estimator = GraphLassoCV(verbose=1)

###############################################################################
# We just fit our regions signals into the `GraphLassoCV` object
covariance_estimator.fit(timeseries)

###############################################################################
# and get the ROI-to-ROI covariance matrix.
matrix = covariance_estimator.covariance_
print("Covariance matrix has shape {0}.".format(matrix.shape))

###############################################################################
# Plot matrix and graph
# ---------------------
#
# We use `matplotlib` plotting functions to visualize our correlation matrix
示例#40
0
    c0.append(temp_A[0])
    c1.append(temp_A[1])
    c2.append(temp_B)

data = np.array([c0, c1, c2])

data = data.transpose()

print data

# emp_cov = empirical_covariance(data, assume_centered=False)
#
# print emp_cov

model = GraphLassoCV()
model.fit(data)

cov_ = model.covariance_

prec_ = model.precision_

corr = np.corrcoef(data, rowvar=False)

print corr

# print cov_
# print prec_


threshold = 0.1
示例#41
0
noise_sd=2
data,data_conv=mk_dcm_dataset(timepoints,z,noise_sd)
numpy.savez(os.path.join(results_dir,'dcmdata.npz'),data=data_conv,A=A,B=B,C=C,u=u,d=d,design=design)


# In[4]:

plt.subplot(211)
plt.plot(data_conv)
cc=numpy.corrcoef(data_conv.T)
print 'correlation matrix'
print cc
from sklearn.covariance import GraphLassoCV
import matplotlib.colors

glasso=GraphLassoCV()
glasso.fit(data_conv)
from pcor_from_precision import pcor_from_precision
pcor=pcor_from_precision(glasso.precision_)
print 'partial r^2 matrix'
print pcor**2

plt.figure(figsize=(10,5))
plt.subplot(141)
plt.imshow(A,interpolation='nearest',norm=matplotlib.colors.Normalize(vmin=-1,vmax=1))
plt.subplot(142)
plt.imshow(B,interpolation='nearest',norm=matplotlib.colors.Normalize(vmin=-1,vmax=1))
plt.subplot(143)
plt.imshow(cc,interpolation='nearest',norm=matplotlib.colors.Normalize(vmin=-1,vmax=1))
plt.subplot(144)
plt.imshow(pcor**2,interpolation='nearest',norm=matplotlib.colors.Normalize(vmin=-1,vmax=1))
示例#42
0
def GraphicLasso(X):
    model = GraphLassoCV()
    model.fit(X)
    cov_ = model.covariance_
    prec_ = model.precision_
    return prec_
##############################################################################
# Extract time series
# --------------------
from nilearn.input_data import NiftiMapsMasker
masker = NiftiMapsMasker(maps_img=atlas_filename, standardize=True,
                         memory='nilearn_cache', verbose=5)

time_series = masker.fit_transform(data.func[0],
                                   confounds=data.confounds)

##############################################################################
# Compute the sparse inverse covariance
# --------------------------------------
from sklearn.covariance import GraphLassoCV
estimator = GraphLassoCV()

estimator.fit(time_series)

##############################################################################
# Display the connectome matrix
# ------------------------------
from matplotlib import pyplot as plt

# Display the covariance
plt.figure(figsize=(10, 10))

# The covariance can be found at estimator.covariance_
plt.imshow(estimator.covariance_, interpolation="nearest",
           vmax=1, vmin=-1, cmap=plt.cm.RdBu_r)
# And display the labels
示例#44
0
文件: inv_cov.py 项目: vsmolyakov/fin
    symbols, names = np.array(list(STOCKS.items())).T
   
    start = datetime(2014, 1, 1, 0, 0, 0, 0, pytz.utc)
    end = datetime(2016, 1, 1, 0, 0, 0, 0, pytz.utc)    

    quotes = [quotes_historical_yahoo(symbol, start, end, asobject=True) for symbol in symbols]

    qopen = np.array([q.open for q in quotes]).astype(np.float)
    qclose = np.array([q.close for q in quotes]).astype(np.float)                
            
    variation= qclose - qopen  #per day variation in price for each symbol
    X = variation.T
    X /= X.std(axis=0) #standardize to use correlations rather than covariance
                
    #estimate inverse covariance    
    graph = GraphLassoCV()
    graph.fit(X)
    
    gl_cov = graph.covariance_
    gl_prec = graph.precision_
    gl_alphas =graph.cv_alphas_
    gl_scores = np.mean(graph.grid_scores, axis=1)

    plt.figure()        
    sns.heatmap(gl_prec)
    
    plt.figure()    
    plt.plot(gl_alphas, gl_scores, marker='o', color='b', lw=2.0, label='GraphLassoCV')
    plt.title("Graph Lasso Alpha Selection")
    plt.xlabel("alpha")
    plt.ylabel("score")
# -*- coding: utf-8 -*-
"""
Created on Mon Sep 12 10:16:16 2016

@author: jonyoung
"""

import connectivity_utils as utils
import numpy as np
import scipy.linalg as la
from sklearn.covariance import GraphLassoCV, ledoit_wolf, GraphLasso
from sklearn.preprocessing import scale

connectivity_data = utils.load_hcp_matrix(
    '/home/jonyoung/IoP_data/Data/HCP_PTN820/node_timeseries/3T_HCP820_MSMAll_d15_ts2/715950.txt'
)

print connectivity_data
print np.shape(connectivity_data)
print np.std(connectivity_data, axis=1)
connectivity_data = connectivity_data[:, :250]
X = scale(connectivity_data, axis=1)
model = GraphLassoCV(max_iter=1500, assume_centered=True)
model.fit(np.transpose(X))
示例#46
0
############################################################################
# Graphical LASSO on Yeo's clusters extracted from sepideh's data 
############################################################################
import numpy as np
import pylab as pl
from sklearn.covariance import GraphLassoCV, OAS
tc = tc_roi
glasso = GraphLassoCV(verbose=1, n_refinements=3, alphas=3, n_jobs=2)
glasso.fit(tc)
cov_ = glasso.covariance_
prec_ = glasso.precision_