def cross_val_score_GLasso(data, fold=5, alpha=0.01):

    n = data.shape[0]
    m = int(n / fold)
    score = {}
    score['log_lik'] = 0
    score['AIC'] = 0
    score['non_zero'] = 0

    for i in range(1, fold + 1):
        test_index = np.arange((i - 1) * m, i * m)
        #print(test_index)
        train_index = np.delete(np.arange(0, n), test_index)
        test_data = data[test_index, :]
        train_data = data[train_index, :]
        cov = sample_cov(test_data)
        model = GraphicalLasso(alpha=alpha)
        model.fit(train_data)
        prec = model.precision_

        score['log_lik'] += log_likelihood(cov, prec) / fold
        score['AIC'] += AIC(cov, prec, n - m) / fold
        score['non_zero'] += L0_penal(prec) / fold

    return score
示例#2
0
def glasso_results(data_grid, K, K_obs, ells, alpha):
    gl = GLsk(alpha=alpha, mode='cd', assume_centered=False, max_iter=500)

    tic = time.time()
    iters = []
    precisions = []
    for d in data_grid.transpose(2, 0, 1):
        gl.fit(d)
        iters.append(gl.n_iter_)
        precisions.append(gl.precision_)
    tac = time.time()
    iterations = np.max(iters)
    precisions = np.array(precisions)

    ss = utils.structure_error(K, precisions)  #, thresholding=1, eps=1e-5)

    MSE_observed = None
    MSE_precision = utils.error_norm(K, precisions, upper_triangular=True)
    MSE_latent = None
    mean_rank_error = None

    res = dict(n_dim_obs=K.shape[1],
               time=tac - tic,
               iterations=iterations,
               MSE_precision=MSE_precision,
               MSE_observed=MSE_observed,
               MSE_latent=MSE_latent,
               mean_rank_error=mean_rank_error,
               likelihood=likelihood_score(data_grid.transpose(2, 0, 1),
                                           precisions),
               note=None,
               estimator=gl)

    res = dict(res, **ss)
    return res
示例#3
0
    def test_gowl_vs_glasso_duality_gap_3(self):
        """
        Duality Gap goes negative in this case. Should that happen?
        """
        np.random.seed(680)
        p = 10
        blocks = [
            Block(dim=p,
                  idx=0,
                  block_min_size=2,
                  block_max_size=6,
                  block_value=0.9),
            Block(dim=p,
                  idx=1,
                  block_min_size=2,
                  block_max_size=6,
                  block_value=-0.9),
            Block(dim=p,
                  idx=3,
                  block_min_size=2,
                  block_max_size=6,
                  block_value=-0.5),
        ]
        theta_star, blocks, theta_blocks = generate_theta_star_gowl(p=p,
                                                                    alpha=0.5,
                                                                    noise=0.1,
                                                                    blocks=blocks)
        lam1 = 0.001  # controls sparsity
        lam2 = 0.01  # encourages equality of coefficients
        rho = oscar_weights(lam1, lam2, (p ** 2 - p) / 2)

        theta_star = theta_star[0]
        sigma = np.linalg.inv(theta_star)
        n = 100
        X = np.random.multivariate_normal(np.zeros(p), sigma, n)
        X = standardize(X)
        S = np.cov(X.T)

        theta_0 = np.linalg.inv(S)
        model = GOWLModel(X, S, theta_0, lam1, lam2, 'backtracking', max_iters=100000)
        model.fit()
        theta_gowl = model.theta_hat

        gl = GraphicalLasso(max_iter=200)
        gl.fit(S)
        theta_glasso = gl.get_precision()

        print('Non zero entries in precision matrix {}'.format(np.count_nonzero(theta_gowl)))
        plot_multiple_theta_matrices_2d([theta_blocks, theta_star, theta_glasso, theta_gowl],
                                        [f"Blocks: {len(blocks)}", 'True Theta', 'GLASSO', 'GOWL'])

        _fit_evaluations(theta_star, theta_glasso, 3, 'GLASSO')
        _fit_evaluations(theta_star, theta_gowl, 3, 'GOWL')

        y_hat_gowl = spectral_clustering(theta=theta_gowl, K=4)
        y_hat_glasso = spectral_clustering(theta=theta_glasso, K=4)
        y_true = spectral_clustering(theta=theta_blocks, K=4).flatten()
        _cluster_evaluations(y_true, y_hat_gowl, 'GOWL')
        _cluster_evaluations(y_true, y_hat_glasso, 'GLASSO')
示例#4
0
def get_optimal_cov_estimator(time_series):
    from sklearn.covariance import GraphicalLassoCV

    estimator = GraphicalLassoCV(cv=5, assume_centered=True)
    print("\nSearching for best Lasso estimator...\n")
    try:
        estimator.fit(time_series)
        return estimator
    except BaseException:
        ix = 0
        print("\nModel did not converge on first attempt. "
              "Varying tolerance...\n")
        while not hasattr(estimator, 'covariance_') and \
            not hasattr(estimator, 'precision_') and ix < 3:
            for tol in [0.1, 0.01, 0.001, 0.0001]:
                print(f"Tolerance={tol}")
                estimator = GraphicalLassoCV(cv=5,
                                             max_iter=200,
                                             tol=tol,
                                             assume_centered=True)
                try:
                    estimator.fit(time_series)
                    return estimator
                except BaseException:
                    ix += 1
                    continue

    if not hasattr(estimator, 'covariance_') and not hasattr(
            estimator, 'precision_'):
        print("Unstable Lasso estimation. Applying shrinkage to empirical "
              "covariance...")
        from sklearn.covariance import (
            GraphicalLasso,
            empirical_covariance,
            shrunk_covariance,
        )
        try:
            emp_cov = empirical_covariance(time_series, assume_centered=True)
            for i in np.arange(0.8, 0.99, 0.01):
                print(f"Shrinkage={i}:")
                shrunk_cov = shrunk_covariance(emp_cov, shrinkage=i)
                alphaRange = 10.0**np.arange(-8, 0)
                for alpha in alphaRange:
                    print(f"Auto-tuning alpha={alpha}...")
                    estimator_shrunk = GraphicalLasso(alpha,
                                                      assume_centered=True)
                    try:
                        estimator_shrunk.fit(shrunk_cov)
                        return estimator_shrunk
                    except BaseException:
                        continue
        except BaseException:
            return None
    else:
        return estimator
示例#5
0
 def predict(self,
             data: pd.DataFrame,
             alpha: float = 0.01,
             max_iter: int = 2000,
             **kwargs) -> nx.Graph:
     """Predict the graph structure """
     edge_model = GraphicalLasso(alpha=alpha, max_iter=max_iter)
     edge_model.fit(data.values)
     return nx.relabel_nodes(nx.DiGraph(edge_model.get_precision()),
                             {idx: i
                              for idx, i in enumerate(data.columns)})
示例#6
0
def test_SGL_scikit():
    """
    test single Graphical Lasso solver vs. scikit-learn
    """
    p = 10
    N = 100

    Sigma, Theta = generate_precision_matrix(p=p,
                                             M=2,
                                             style='erdos',
                                             gamma=2.8,
                                             prob=0.1,
                                             scale=False,
                                             nxseed=None)
    S, samples = sample_covariance_matrix(
        Sigma, N)  # sample from multivar_norm(Sigma)

    lambda1 = 0.01

    singleGL = GraphicalLasso(alpha=lambda1,
                              tol=1e-6,
                              max_iter=500,
                              verbose=False)
    model = singleGL.fit(samples.T)  # transpose because of sklearn format

    sol_scikit = model.precision_

    Omega_0 = np.eye(p)

    sol, info = ADMM_SGL(S,
                         lambda1,
                         Omega_0,
                         tol=1e-7,
                         rtol=1e-5,
                         verbose=True,
                         latent=False)

    # run into max_iter
    sol2, info2 = ADMM_SGL(S,
                           lambda1,
                           Omega_0,
                           stopping_criterion='kkt',
                           tol=1e-20,
                           max_iter=200,
                           verbose=True,
                           latent=False)

    assert_array_almost_equal(sol_scikit, sol['Theta'], 3)
    assert_array_almost_equal(sol_scikit, sol2['Theta'], 3)

    return
示例#7
0
    def _fit(self, X):
        self.estimator_     = GraphicalLasso(
            alpha           = self.alpha,
            assume_centered = self.assume_centered,
            enet_tol        = self.enet_tol,
            max_iter        = self.max_iter,
            mode            = self.mode,
            tol             = self.tol
        ).fit(X)

        _, self.labels_     = affinity_propagation(
            self.partial_corrcoef_, **self._apcluster_params
        )

        return self
示例#8
0
def wrapper():
    seed = int(snakemake.wildcards["replicate"])
    np.random.seed(seed)
    data = snakemake.input["data"]
    df = pd.read_csv(data)
    X = df.values

    cov = GraphicalLasso(alpha=float(snakemake.wildcards["alpha"]),
                         mode=snakemake.wildcards["mode"],
                         tol=float(snakemake.wildcards["tol"]),
                         enet_tol=float(snakemake.wildcards["enet_tol"]),
                         max_iter=int(snakemake.wildcards["max_iter"]),
                         verbose=bool(snakemake.wildcards["verbose"]),
                         assume_centered=bool(
                             snakemake.wildcards["assume_centered"])).fit(X)
    #adjmat = np.around(np.abs(cov.precision_), decimals=3)
    adjmat = ((np.around(np.abs(cov.precision_), decimals=3) > float(
        snakemake.wildcards["precmat_threshold"])) * 1 -
              np.identity(X.shape[1])).astype(int)

    tottime = time.perf_counter() - start

    time_filename = snakemake.output["time"]
    np.savetxt(time_filename, [tottime])
    dfadj = pd.DataFrame(adjmat)
    dfadj.columns = df.columns

    dfadj.to_csv(filename, index=False)
示例#9
0
def main():
    mean = torch.tensor(np.ones(16), dtype=torch.float32)
    diag = torch.tensor(np.ones(16), dtype=torch.float32)

    population = Gaussian_Distribution(mean=mean,
                                       diag=diag,
                                       sub=0.3,
                                       type='chain',
                                       slash=1)
    truth = population.invcov.numpy()
    n = 1000
    d = population.dim

    print(truth)
    dist, sample, _, S = population.generate(n, numpy_like=True)
    #print(S)
    #print(np.array(sample))
    print(sample_mean(np.array(sample)))
    print(sample_cov(np.array(sample)))

    R = np.linalg.inv(S)
    #print(R)
    #print(sample)
    np.random.seed(0)
    model = GraphicalLassoCV()
    model.fit(np.array(sample))
    cov_ = model.covariance_
    prec_ = model.precision_

    heatmap(prec_)

    plt.figure(figsize=(4, 3))
    plt.axes([.2, .15, .75, .7])
    plt.plot(model.cv_alphas_, np.mean(model.grid_scores_, axis=1), 'o-')
    plt.axvline(model.alpha_, color='.5')
    plt.title('Model selection')
    plt.ylabel('Cross-validation score')
    plt.xlabel('alpha')

    plt.show()
    print(model.cv_alphas_, model.grid_scores_)

    model = GraphicalLasso()
    model.fit(sample)
    heatmap(model.precision_, 0.055)

    score = dict()
    score['log_lik'] = []
    score['AIC'] = []
    alpha_list = np.hstack((np.arange(0, 0.1,
                                      0.001), np.arange(0.11, 0.3, 0.01)))
    data = np.array(sample)
    for alpha in alpha_list:
        out_dict = cross_val_score_GLasso(data, alpha=alpha)
        score['log_lik'].append(out_dict['log_lik'])
        score['AIC'].append(out_dict['AIC'])
    plt.plot(alpha_list, score['log_lik'], 'o-')
    plt.show()
    plt.plot(alpha_list, score['AIC'])
    plt.show()
 def fit(self):
     if not self.is_fitted:
         all_x = [
             elem.reshape(-1) for a_list in self.data.values()
             for elem in a_list
         ]
         e = None
         for alpha in np.logspace(-1, 5, 10):
             try:
                 self.estimator = GraphicalLasso(assume_centered=False,
                                                 alpha=alpha)
                 self.estimator.fit(all_x)
                 self.is_fitted = True
                 return
             except Exception as e:
                 logger.error(f"Graphical lasso failed with alpha={alpha}")
         raise e
示例#11
0
    def predict(self, data, alpha=0.01, max_iter=2000, **kwargs):
        """ Predict the graph skeleton.

        Args:
            data (pandas.DataFrame): observational data
            alpha (float): regularization parameter
            max_iter (int): maximum number of iterations

        Returns:
            networkx.Graph: Graph skeleton
        """
        edge_model = GraphicalLasso(alpha=alpha, max_iter=max_iter)
        edge_model.fit(data.values)

        return nx.relabel_nodes(nx.DiGraph(edge_model.get_precision()),
                                {idx: i
                                 for idx, i in enumerate(data.columns)})
示例#12
0
def test_graphical_lasso(random_state=0):
    # Sample area_data from a sparse multivariate normal
    dim = 20
    n_samples = 100
    random_state = check_random_state(random_state)
    prec = make_sparse_spd_matrix(dim, alpha=.95, random_state=random_state)
    cov = linalg.inv(prec)
    X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
    emp_cov = empirical_covariance(X)

    for alpha in (0., .1, .25):
        covs = dict()
        icovs = dict()
        for method in ('cd', 'lars'):
            cov_, icov_, costs = graphical_lasso(emp_cov,
                                                 return_costs=True,
                                                 alpha=alpha,
                                                 mode=method)
            covs[method] = cov_
            icovs[method] = icov_
            costs, dual_gap = np.array(costs).T
            # Check that the costs always decrease (doesn't hold if alpha == 0)
            if not alpha == 0:
                assert_array_less(np.diff(costs), 0)
        # Check that the 2 approaches give similar results
        assert_array_almost_equal(covs['cd'], covs['lars'], decimal=4)
        assert_array_almost_equal(icovs['cd'], icovs['lars'], decimal=4)

    # Smoke test the estimator
    model = GraphicalLasso(alpha=.25).fit(X)
    model.score(X)
    assert_array_almost_equal(model.covariance_, covs['cd'], decimal=4)
    assert_array_almost_equal(model.covariance_, covs['lars'], decimal=4)

    # For a centered matrix, assume_centered could be chosen True or False
    # Check that this returns indeed the same result for centered area_data
    Z = X - X.mean(0)
    precs = list()
    for assume_centered in (False, True):
        prec_ = GraphicalLasso(
            assume_centered=assume_centered).fit(Z).precision_
        precs.append(prec_)
    assert_array_almost_equal(precs[0], precs[1])
def test_graphical_lasso(random_state=0):
    # Sample data from a sparse multivariate normal
    dim = 20
    n_samples = 100
    random_state = check_random_state(random_state)
    prec = make_sparse_spd_matrix(dim, alpha=.95,
                                  random_state=random_state)
    cov = linalg.inv(prec)
    X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
    emp_cov = empirical_covariance(X)

    for alpha in (0., .1, .25):
        covs = dict()
        icovs = dict()
        for method in ('cd', 'lars'):
            cov_, icov_, costs = graphical_lasso(emp_cov, return_costs=True,
                                                 alpha=alpha, mode=method)
            covs[method] = cov_
            icovs[method] = icov_
            costs, dual_gap = np.array(costs).T
            # Check that the costs always decrease (doesn't hold if alpha == 0)
            if not alpha == 0:
                assert_array_less(np.diff(costs), 0)
        # Check that the 2 approaches give similar results
        assert_array_almost_equal(covs['cd'], covs['lars'], decimal=4)
        assert_array_almost_equal(icovs['cd'], icovs['lars'], decimal=4)

    # Smoke test the estimator
    model = GraphicalLasso(alpha=.25).fit(X)
    model.score(X)
    assert_array_almost_equal(model.covariance_, covs['cd'], decimal=4)
    assert_array_almost_equal(model.covariance_, covs['lars'], decimal=4)

    # For a centered matrix, assume_centered could be chosen True or False
    # Check that this returns indeed the same result for centered data
    Z = X - X.mean(0)
    precs = list()
    for assume_centered in (False, True):
        prec_ = GraphicalLasso(
            assume_centered=assume_centered).fit(Z).precision_
        precs.append(prec_)
    assert_array_almost_equal(precs[0], precs[1])
    def get_mean_cov(x,y):
        max_label = y.astype(int).max()
        
        ps = []
        ms = []
        
        for i in range(max_label + 1):
        
            model = GraphicalLasso()
            label_i = (y==i).astype(bool)
            x2 = x[label_i]
            
            model.fit(x2)
            ps.append(model.precision_)
            ms.append(model.location_)

        ms = np.stack(ms)
        ps = np.stack(ps)
        
        return ms,ps
示例#15
0
def glasso(subsamples, alpha, precision_tol=1e-4, glasso_params={}):
    """Run the graphical lasso from scikit learn over the given
    subsamples, at the given regularization level.

    Parameters:
      - subsamples (np.array): the subsample array
      - alpha (float): the regularization parameter at which to run
        the estimator, taken as 1/lambda, i.e, lower values mean
        sparser

    Returns:
      - estimates (np.array): The adjacency matrices of the graphs
        estimated for each subsample
    """
    (N, _, p) = subsamples.shape
    precisions = np.zeros((len(subsamples), p, p))
    g = GraphicalLasso(alpha=1 / alpha, **glasso_params)
    for j, sample in enumerate(subsamples):
        precision = g.fit(sample).precision_
        precisions[j, :, :] = precision - np.diag(np.diag(precision))
    estimates = (abs(precisions) > precision_tol).astype(int)
    return estimates
示例#16
0
def get_cov_estimator(cov_type):
    if cov_type == 'LW':
        model = LedoitWolf()
    elif cov_type == 'OAS':
        model = OAS()
    elif cov_type == 'MCD':
        model = MinCovDet()
    elif cov_type[:2] == 'SC':
        shrinkage = float(cov_type.split('_')[1])
        model = ShrunkCovariance(shrinkage=shrinkage)
    elif cov_type[:2] == 'GL':
        alpha = float(cov_type.split('_')[1])
        model = GraphicalLasso(alpha=alpha)
    return model
示例#17
0
def get_mean_cov(x, y):
    #print(x.shape)
    ms_list = []
    ps_list = []

    # Label equals to One
    ones = (y == 1).astype(bool)
    model = GraphicalLasso()
    x2 = x[ones]
    kmeans = GaussianMixture(n_components=3,
                             init_params='random',
                             covariance_type='full')
    new_label = kmeans.fit_predict(x2)

    for elem in range(3):
        index = np.where(new_label == elem)
        tmp_df = x2[index]
        #print(tmp_df.shape)
        model.fit(tmp_df)
        p1 = model.precision_
        m1 = model.location_
        ms_list.append(m1)
        ps_list.append(p1)

    # Label equals to Zero
    onesb = (y == 0).astype(bool)
    x2b = x[onesb]
    kmeans = GaussianMixture(n_components=3,
                             init_params='random',
                             covariance_type='full')
    new_label = kmeans.fit_predict(x2b)
    model = GraphicalLasso()
    for elem in range(3):
        index = np.where(new_label == elem)
        tmp_df = x2b[index]

        model.fit(tmp_df)
        p1 = model.precision_
        m1 = model.location_
        ms_list.append(m1)
        ps_list.append(p1)

    ms = np.stack(ms_list)
    ps = np.stack(ps_list)
    return ms, ps
示例#18
0
def get_mean_cov3(x, y):
    #print(x.shape)
    ms_list = []
    ps_list = []

    # Label equals to One
    ones = (y == 1).astype(bool)
    model = GraphicalLasso()
    x2 = x[ones]
    kmeans = KMeans(n_clusters=3, random_state=0, algorithm='elkan').fit(x2)
    new_label = kmeans.labels_

    for elem in range(3):
        index = np.where(new_label == elem)
        tmp_df = x2[index]
        #print(tmp_df.shape)
        model.fit(tmp_df)
        p1 = model.precision_
        m1 = model.location_
        ms_list.append(m1)
        ps_list.append(p1)

    # Label equals to Zero
    onesb = (y == 0).astype(bool)
    x2b = x[onesb]
    kmeans = KMeans(n_clusters=3, random_state=0, algorithm='elkan').fit(x2b)
    new_label = kmeans.labels_
    model = GraphicalLasso()
    for elem in range(3):
        index = np.where(new_label == elem)
        tmp_df = x2b[index]

        model.fit(tmp_df)
        p1 = model.precision_
        m1 = model.location_
        ms_list.append(m1)
        ps_list.append(p1)

    ms = np.stack(ms_list)
    ps = np.stack(ps_list)
    return ms, ps
示例#19
0
def get_mean_cov(x, y):
    model = GraphicalLasso()
    ones = (y == 1).astype(bool)
    x2 = x[ones]
    model.fit(x2)
    p1 = model.precision_
    m1 = model.location_

    onesb = (y == 0).astype(bool)
    x2b = x[onesb]
    model.fit(x2b)
    p2 = model.precision_
    m2 = model.location_

    ms = np.stack([m1, m2])
    ps = np.stack([p1, p2])
    return ms, ps
示例#20
0
def compute_covariance(dataset):
    """
    Estimate covariance and precision matrices from data X.

    Depending on samples number, use either EmpiricalCovariance or
    GraphicalLasso methods from scikit-learn.

    Input:
        dataset: ndarray
            Dataset
    Outputs:
        covariance: ndarray
            Estimated covariance matrix
        precision: ndarray
            Estimated precision matrix (i.e. pseudo-inverse of
            covariance)
    """
    # Turn matching warnings into exceptions
    warnings.filterwarnings("error")
    if nb_samples_is_sufficient(dataset):
        cov = EmpiricalCovariance().fit(dataset)
        covariance = cov.covariance_
        precision = cov.precision_
        return covariance, precision
    else:
        try:
            cov = GraphicalLasso(mode='cd').fit(dataset)
            covariance = cov.covariance_
            precision = cov.precision_
            return covariance, precision
        except Exception as e:
            lasso_error = str(e)
            raise ValueError(lasso_error +
                             '\nNumber of reference trajectories not '
                             'sufficiently large to estimate covariance '
                             'and precision matrices.')
示例#21
0
def helper_graphical_lasso(X, theta_true, tf_names=[]):
    # Estimate the covariance
    if args.mode == 'cv':
        model = GraphicalLassoCV()
    else:
        model = GraphicalLasso(alpha=args.alpha_l1,
                               mode=args.mode,
                               tol=1e-7,
                               enet_tol=1e-6,
                               max_iter=100,
                               verbose=False,
                               assume_centered=False)
    model.fit(X)
    #    cov_ = model.covariance_
    prec_ = model.precision_
    if args.USE_TF_NAMES == 'yes' and len(tf_names) != 0:
        prec_ = postprocess_tf(prec_, tf_names)
    recovery_metrics = report_metrics(np.array(theta_true), prec_)
    print(
        'GLASSO: FDR, TPR, FPR, SHD, nnz_true, nnz_pred, precision, recall, Fb, aupr, auc'
    )
    print('GLASSO: TEST: Recovery of true theta: ',
          *np.around(recovery_metrics, 3))
    return list(recovery_metrics)
示例#22
0
def get_conn_matrix(time_series, conn_model, dir_path, node_size, smooth, dens_thresh, network, ID, roi, min_span_tree,
                    disp_filt, parc, prune, atlas, uatlas, labels, coords, norm, binary,
                    hpass, extract_strategy):
    """
    Computes a functional connectivity matrix based on a node-extracted time-series array.
    Includes a library of routines across Nilearn, scikit-learn, and skggm packages, among others.

    Parameters
    ----------
    time_series : array
        2D m x n array consisting of the time-series signal for each ROI node where m = number of scans and
        n = number of ROI's.
    conn_model : str
       Connectivity estimation model (e.g. corr for correlation, cov for covariance, sps for precision covariance,
       partcorr for partial correlation). sps type is used by default.
    dir_path : str
        Path to directory containing subject derivative data for given run.
    node_size : int
        Spherical centroid node size in the case that coordinate-based centroids
        are used as ROI's.
    smooth : int
        Smoothing width (mm fwhm) to apply to time-series when extracting signal from ROI's.
    dens_thresh : bool
        Indicates whether a target graph density is to be used as the basis for
        thresholding.
    network : str
        Resting-state network based on Yeo-7 and Yeo-17 naming (e.g. 'Default') used to filter nodes in the study of
        brain subgraphs.
    ID : str
        A subject id or other unique identifier.
    roi : str
        File path to binarized/boolean region-of-interest Nifti1Image file.
    min_span_tree : bool
        Indicates whether local thresholding from the Minimum Spanning Tree
        should be used.
    disp_filt : bool
        Indicates whether local thresholding using a disparity filter and
        'backbone network' should be used.
    parc : bool
        Indicates whether to use parcels instead of coordinates as ROI nodes.
    prune : bool
        Indicates whether to prune final graph of disconnected nodes/isolates.
    atlas : str
        Name of atlas parcellation used.
    uatlas : str
        File path to atlas parcellation Nifti1Image in MNI template space.
    labels : list
        List of string labels corresponding to ROI nodes.
    coords : list
        List of (x, y, z) tuples corresponding to a coordinate atlas used or
        which represent the center-of-mass of each parcellation node.
    norm : int
        Indicates method of normalizing resulting graph.
    binary : bool
        Indicates whether to binarize resulting graph edges to form an
        unweighted graph.
    hpass : bool
        High-pass filter values (Hz) to apply to node-extracted time-series.
    extract_strategy : str 
        The name of a valid function used to reduce the time-series region extraction.

    Returns
    -------
    conn_matrix : array
        Adjacency matrix stored as an m x n array of nodes and edges.
    conn_model : str
       Connectivity estimation model (e.g. corr for correlation, cov for covariance, sps for precision covariance,
       partcorr for partial correlation). sps type is used by default.
    dir_path : str
        Path to directory containing subject derivative data for given run.
    node_size : int
        Spherical centroid node size in the case that coordinate-based centroids
        are used as ROI's for tracking.
    smooth : int
        Smoothing width (mm fwhm) to apply to time-series when extracting signal from ROI's.
    dens_thresh : bool
        Indicates whether a target graph density is to be used as the basis for
        thresholding.
    network : str
        Resting-state network based on Yeo-7 and Yeo-17 naming (e.g. 'Default') used to filter nodes in the study of
        brain subgraphs.
    ID : str
        A subject id or other unique identifier.
    roi : str
        File path to binarized/boolean region-of-interest Nifti1Image file.
    min_span_tree : bool
        Indicates whether local thresholding from the Minimum Spanning Tree
        should be used.
    disp_filt : bool
        Indicates whether local thresholding using a disparity filter and
        'backbone network' should be used.
    parc : bool
        Indicates whether to use parcels instead of coordinates as ROI nodes.
    prune : bool
        Indicates whether to prune final graph of disconnected nodes/isolates.
    atlas : str
        Name of atlas parcellation used.
    uatlas : str
        File path to atlas parcellation Nifti1Image in MNI template space.
    labels : list
        List of string labels corresponding to graph nodes.
    coords : list
        List of (x, y, z) tuples corresponding to a coordinate atlas used or
        which represent the center-of-mass of each parcellation node.
    norm : int
        Indicates method of normalizing resulting graph.
    binary : bool
        Indicates whether to binarize resulting graph edges to form an
        unweighted graph.
    hpass : bool
        High-pass filter values (Hz) to apply to node-extracted time-series.
    extract_strategy : str 
        The name of a valid function used to reduce the time-series region extraction.

    References
    ----------
    .. [1] Varoquaux, G., & Craddock, R. C. (2013). Learning and comparing functional connectomes
      across subjects. NeuroImage. https://doi.org/10.1016/j.neuroimage.2013.04.007
    .. [2] Jason Laska, Manjari Narayan, 2017. skggm 0.2.7: A scikit-learn compatible package
      for Gaussian and related Graphical Models. doi:10.5281/zenodo.830033

    """
    from nilearn.connectome import ConnectivityMeasure
    from sklearn.covariance import GraphicalLassoCV

    conn_matrix = None
    if conn_model == 'corr' or conn_model == 'cor' or conn_model == 'correlation':
        # credit: nilearn
        print('\nComputing correlation matrix...\n')
        conn_measure = ConnectivityMeasure(kind='correlation')
        conn_matrix = conn_measure.fit_transform([time_series])[0]
    elif conn_model == 'partcorr' or conn_model == 'parcorr' or conn_model == 'partialcorrelation':
        # credit: nilearn
        print('\nComputing partial correlation matrix...\n')
        conn_measure = ConnectivityMeasure(kind='partial correlation')
        conn_matrix = conn_measure.fit_transform([time_series])[0]
    elif conn_model == 'cov' or conn_model == 'covariance' or conn_model == 'covar' or conn_model == 'sps' or \
        conn_model == 'sparse' or conn_model == 'precision':
        # Fit estimator to matrix to get sparse matrix
        estimator_shrunk = None
        estimator = GraphicalLassoCV(cv=5)
        try:
            print('\nComputing covariance...\n')
            estimator.fit(time_series)
        except:
            print('Unstable Lasso estimation--Attempting to re-run by first applying shrinkage...')
            try:
                from sklearn.covariance import GraphicalLasso, empirical_covariance, shrunk_covariance
                emp_cov = empirical_covariance(time_series)
                for i in np.arange(0.8, 0.99, 0.01):
                    shrunk_cov = shrunk_covariance(emp_cov, shrinkage=i)
                    alphaRange = 10.0 ** np.arange(-8, 0)
                    for alpha in alphaRange:
                        try:
                            estimator_shrunk = GraphicalLasso(alpha)
                            estimator_shrunk.fit(shrunk_cov)
                            print(f"Retrying covariance matrix estimate with alpha={alpha}")
                            if estimator_shrunk is None:
                                pass
                            else:
                                break
                        except:
                            print(f"Covariance estimation failed with shrinkage at alpha={alpha}")
                            continue
            except ValueError:
                print('Unstable Lasso estimation! Shrinkage failed. A different connectivity model may be needed.')
        if estimator is None and estimator_shrunk is None:
            raise RuntimeError('\nERROR: Covariance estimation failed.')
        if conn_model == 'sps' or conn_model == 'sparse' or conn_model == 'precision':
            if estimator_shrunk is None:
                print('\nFetching precision matrix from covariance estimator...\n')
                conn_matrix = -estimator.precision_
            else:
                print('\nFetching shrunk precision matrix from covariance estimator...\n')
                conn_matrix = -estimator_shrunk.precision_
        elif conn_model == 'cov' or conn_model == 'covariance' or conn_model == 'covar':
            if estimator_shrunk is None:
                print('\nFetching covariance matrix from covariance estimator...\n')
                conn_matrix = estimator.covariance_
            else:
                conn_matrix = estimator_shrunk.covariance_
    elif conn_model == 'QuicGraphicalLasso':

        try:
            from inverse_covariance import QuicGraphicalLasso
        except ImportError:
            print('Cannot run QuicGraphLasso. Skggm not installed!')

        # Compute the sparse inverse covariance via QuicGraphLasso
        # credit: skggm
        model = QuicGraphicalLasso(
            init_method='cov',
            lam=0.5,
            mode='default',
            verbose=1)
        print('\nCalculating QuicGraphLasso precision matrix using skggm...\n')
        model.fit(time_series)
        conn_matrix = -model.precision_
    elif conn_model == 'QuicGraphicalLassoCV':
        try:
            from inverse_covariance import QuicGraphicalLassoCV
        except ImportError:
            print('Cannot run QuicGraphLassoCV. Skggm not installed!')

        # Compute the sparse inverse covariance via QuicGraphLassoCV
        # credit: skggm
        model = QuicGraphicalLassoCV(
            init_method='cov',
            verbose=1)
        print('\nCalculating QuicGraphLassoCV precision matrix using skggm...\n')
        model.fit(time_series)
        conn_matrix = -model.precision_
    elif conn_model == 'QuicGraphicalLassoEBIC':
        try:
            from inverse_covariance import QuicGraphicalLassoEBIC
        except ImportError:
            print('Cannot run QuicGraphLassoEBIC. Skggm not installed!')

        # Compute the sparse inverse covariance via QuicGraphLassoEBIC
        # credit: skggm
        model = QuicGraphicalLassoEBIC(
            init_method='cov',
            verbose=1)
        print('\nCalculating QuicGraphLassoEBIC precision matrix using skggm...\n')
        model.fit(time_series)
        conn_matrix = -model.precision_
    elif conn_model == 'AdaptiveQuicGraphicalLasso':
        try:
            from inverse_covariance import AdaptiveQuicGraphicalLasso, QuicGraphicalLassoEBIC
        except ImportError:
            print('Cannot run AdaptiveGraphLasso. Skggm not installed!')

        # Compute the sparse inverse covariance via
        # AdaptiveGraphLasso + QuicGraphLassoEBIC + method='binary'
        # credit: skggm
        model = AdaptiveQuicGraphicalLasso(
            estimator=QuicGraphicalLassoEBIC(
                init_method='cov',
            ),
            method='binary',
        )
        print('\nCalculating AdaptiveQuicGraphLasso precision matrix using skggm...\n')
        model.fit(time_series)
        conn_matrix = -model.estimator_.precision_
    else:
        raise ValueError('\nERROR! No connectivity model specified at runtime. Select a valid estimator using the '
                         '-mod flag.')

    # Enforce symmetry
    conn_matrix = np.maximum(conn_matrix, conn_matrix.T)

    if conn_matrix.shape < (2, 2):
        raise RuntimeError('\nERROR! Matrix estimation selection yielded an empty or 1-dimensional graph. '
                           'Check time-series for errors or try using a different atlas')

    coords = np.array(coords)
    labels = np.array(labels)

    del time_series

    return (conn_matrix, conn_model, dir_path, node_size, smooth, dens_thresh, network, ID, roi, min_span_tree,
            disp_filt, parc, prune, atlas, uatlas, labels, coords, norm, binary, hpass, extract_strategy)
示例#23
0
import pandas as pd
import time

seed = int(snakemake.wildcards["replicate"])
np.random.seed(seed)
data = snakemake.input["data"]
filename = snakemake.output["adjmat"]
df = pd.read_csv(data)

X = df.values

start = time.perf_counter()
cov = GraphicalLasso(alpha=float(snakemake.wildcards["alpha"]),
                     mode=snakemake.wildcards["mode"],
                     tol=float(snakemake.wildcards["tol"]),
                     enet_tol=float(snakemake.wildcards["enet_tol"]),
                     max_iter=int(snakemake.wildcards["max_iter"]),
                     verbose=bool(snakemake.wildcards["verbose"]),
                     assume_centered=bool(
                         snakemake.wildcards["assume_centered"])).fit(X)

#adjmat = np.around(np.abs(cov.precision_), decimals=3)
adjmat = ((np.around(np.abs(cov.precision_), decimals=3) > float(
    snakemake.wildcards["precmat_threshold"])) * 1 -
          np.identity(X.shape[1])).astype(int)

tottime = time.perf_counter() - start

time_filename = snakemake.output["time"]
np.savetxt(time_filename, [tottime])
dfadj = pd.DataFrame(adjmat)
dfadj.columns = df.columns
示例#24
0
        Omega_0 = Omega_sol.copy()
        Theta_0 = Theta_sol.copy()

        AIC[g1, g2] = aic(S, Theta_sol, n.mean())
        BIC[g1, g2] = ebic(S, Theta_sol, n.mean(), gamma=0.1)

ix = np.unravel_index(np.nanargmin(BIC), BIC.shape)
ix2 = np.unravel_index(np.nanargmin(AIC), AIC.shape)
lambda1 = L1[ix]
lambda2 = L2[ix]

print("Optimal lambda values: (l1,l2) = ", (lambda1, lambda2))

#%%
singleGL = GraphicalLasso(alpha=1.5 * lambda1,
                          tol=1e-2,
                          max_iter=4000,
                          verbose=True)

res = np.zeros((K, p, p))
for k in np.arange(K):
    #model = quic.fit(S[k,:,:], verbose = 1)
    model = singleGL.fit(samples[k, :, :])

    res[k, :, :] = model.precision_

results['GLASSO'] = {'Theta': res}

#%%
start = time()
sol, info = ADMM_MGL(S, lambda1, lambda2, reg, Omega_0, rho = 1, max_iter = 100, \
                                                        eps_admm = 1e-5, verbose = True, measure = True)
示例#25
0
def block_glasso(data, eps=1e-8, COLLECT=True):
    criterion = nn.MSELoss()  # input, target
    theta_true, X = data

    # #############################################################################
    # Estimate the covariance
    print('Using the lars method')
    S = np.dot(X.T, X) / args.M
    #    model = GraphicalLassoCV(cv=2, alphas=5, n_refinements=5, tol=1e-6,
    #                              max_iter=100, mode='lars', n_jobs=-1)
    model = GraphicalLasso(alpha=args.rho,
                           mode='lars',
                           tol=1e-7,
                           enet_tol=1e-6,
                           max_iter=args.MAX_EPOCH,
                           verbose=True,
                           assume_centered=True)
    #    model = GraphLasso(alpha=args.rho, mode='lars', tol=1e-8, enet_tol=1e-6,
    #               max_iter=100, verbose=False, assume_centered=False)

    #    print('Using the cd method')
    #    model = GraphicalLassoCV(cv=2, alphas=5, n_refinements=5, tol=1e-6,
    #                              max_iter=100, mode='cd', n_jobs=-1)
    model.fit(X)
    cov_ = model.covariance_
    theta_pred = model.precision_
    # #############################################################################
    fdr, tpr, fpr, shd, nnz, nnz_true, ps = metrics.report_metrics(
        theta_true, theta_pred)
    cond_theta_pred, cond_theta_true = np.linalg.cond(
        theta_pred), np.linalg.cond(theta_true)
    num_itr = model.n_iter_
    rho_obtained = args.rho  # the L1 penalty parameter
    print('Accuracy metrics: fdr ', fdr, ' tpr ', tpr, ' fpr ', fpr, ' shd ',
          shd, ' nnz ', nnz, ' nnz_true ', nnz_true, ' sign_match ', ps,
          ' pred_cond ', cond_theta_pred, ' true_cond ', cond_theta_true,
          'total itr: ', num_itr, ' penalty_rho: ', rho_obtained)

    # Getting the NMSE and objective value
    # results of convergence
    res_conv = []

    theta_true = convert_to_torch(theta_true, TESTING_FLAG=True)
    theta_pred = convert_to_torch(theta_pred, TESTING_FLAG=True)
    S = convert_to_torch(S, TESTING_FLAG=True)
    obj_true = get_obj_val(theta_true, S)
    if COLLECT:
        theta_pred_diag = torch.diag_embed(
            torch.diagonal(theta_pred, offset=0, dim1=-2, dim2=-1))
        theta_true_diag = torch.diag_embed(
            torch.diagonal(theta_true, offset=0, dim1=-2, dim2=-1))
        cv_loss, cv_loss_off_diag, obj_pred = get_convergence_loss(
            theta_pred, theta_true), get_convergence_loss(
                theta_pred - theta_pred_diag,
                theta_true - theta_true_diag), get_obj_val(theta_pred, S)
        res_conv.append([cv_loss, obj_pred, obj_true, cv_loss_off_diag])

    return [
        fdr, tpr, fpr, shd, nnz, nnz_true, ps, cond_theta_pred,
        cond_theta_true, num_itr, rho_obtained
    ], res_conv  # result of convergence
示例#26
0
def get_conn_matrix(time_series, conn_model, dir_path, node_size, smooth,
                    dens_thresh, network, ID, roi, min_span_tree, disp_filt,
                    parc, prune, atlas_select, uatlas_select, label_names,
                    coords, c_boot, norm, binary):
    from nilearn.connectome import ConnectivityMeasure
    from sklearn.covariance import GraphicalLassoCV

    conn_matrix = None
    if conn_model == 'corr' or conn_model == 'cor' or conn_model == 'correlation':
        # credit: nilearn
        print('\nComputing correlation matrix...\n')
        conn_measure = ConnectivityMeasure(kind='correlation')
        conn_matrix = conn_measure.fit_transform([time_series])[0]
    elif conn_model == 'partcorr' or conn_model == 'parcorr' or conn_model == 'partialcorrelation':
        # credit: nilearn
        print('\nComputing partial correlation matrix...\n')
        conn_measure = ConnectivityMeasure(kind='partial correlation')
        conn_matrix = conn_measure.fit_transform([time_series])[0]
    elif conn_model == 'cov' or conn_model == 'covariance' or conn_model == 'covar' or conn_model == 'sps' or conn_model == 'sparse' or conn_model == 'precision':
        # Fit estimator to matrix to get sparse matrix
        estimator_shrunk = None
        estimator = GraphicalLassoCV(cv=5)
        try:
            print('\nComputing covariance...\n')
            estimator.fit(time_series)
        except:
            print(
                'Unstable Lasso estimation--Attempting to re-run by first applying shrinkage...'
            )
            try:
                from sklearn.covariance import GraphicalLasso, empirical_covariance, shrunk_covariance
                emp_cov = empirical_covariance(time_series)
                for i in np.arange(0.8, 0.99, 0.01):
                    shrunk_cov = shrunk_covariance(emp_cov, shrinkage=i)
                    alphaRange = 10.0**np.arange(-8, 0)
                    for alpha in alphaRange:
                        try:
                            estimator_shrunk = GraphicalLasso(alpha)
                            estimator_shrunk.fit(shrunk_cov)
                            print(
                                "Retrying covariance matrix estimate with alpha=%s"
                                % alpha)
                            if estimator_shrunk is None:
                                pass
                            else:
                                break
                        except:
                            print(
                                "Covariance estimation failed with shrinkage at alpha=%s"
                                % alpha)
                            continue
            except ValueError:
                print(
                    'Unstable Lasso estimation! Shrinkage failed. A different connectivity model may be needed.'
                )
        if estimator is None and estimator_shrunk is None:
            raise RuntimeError('\nERROR: Covariance estimation failed.')
        if conn_model == 'sps' or conn_model == 'sparse' or conn_model == 'precision':
            if estimator_shrunk is None:
                print(
                    '\nFetching precision matrix from covariance estimator...\n'
                )
                conn_matrix = -estimator.precision_
            else:
                print(
                    '\nFetching shrunk precision matrix from covariance estimator...\n'
                )
                conn_matrix = -estimator_shrunk.precision_
        elif conn_model == 'cov' or conn_model == 'covariance' or conn_model == 'covar':
            if estimator_shrunk is None:
                print(
                    '\nFetching covariance matrix from covariance estimator...\n'
                )
                conn_matrix = estimator.covariance_
            else:
                conn_matrix = estimator_shrunk.covariance_
    elif conn_model == 'QuicGraphicalLasso':
        try:
            from inverse_covariance import QuicGraphicalLasso
        except ImportError:
            print('Cannot run QuicGraphLasso. Skggm not installed!')

        # Compute the sparse inverse covariance via QuicGraphLasso
        # credit: skggm
        model = QuicGraphicalLasso(init_method='cov',
                                   lam=0.5,
                                   mode='default',
                                   verbose=1)
        print('\nCalculating QuicGraphLasso precision matrix using skggm...\n')
        model.fit(time_series)
        conn_matrix = -model.precision_
    elif conn_model == 'QuicGraphLassoCV':
        try:
            from inverse_covariance import QuicGraphicalLassoCV
        except ImportError:
            print('Cannot run QuicGraphLassoCV. Skggm not installed!')

        # Compute the sparse inverse covariance via QuicGraphLassoCV
        # credit: skggm
        model = QuicGraphicalLassoCV(init_method='cov', verbose=1)
        print(
            '\nCalculating QuicGraphLassoCV precision matrix using skggm...\n')
        model.fit(time_series)
        conn_matrix = -model.precision_
    elif conn_model == 'QuicGraphicalLassoEBIC':
        try:
            from inverse_covariance import QuicGraphicalLassoEBIC
        except ImportError:
            print('Cannot run QuicGraphLassoEBIC. Skggm not installed!')

        # Compute the sparse inverse covariance via QuicGraphLassoEBIC
        # credit: skggm
        model = QuicGraphicalLassoEBIC(init_method='cov', verbose=1)
        print(
            '\nCalculating QuicGraphLassoEBIC precision matrix using skggm...\n'
        )
        model.fit(time_series)
        conn_matrix = -model.precision_
    elif conn_model == 'AdaptiveQuicGraphLasso':
        try:
            from inverse_covariance import AdaptiveQuicGraphicalLasso, QuicGraphicalLassoEBIC
        except ImportError:
            print('Cannot run AdaptiveGraphLasso. Skggm not installed!')

        # Compute the sparse inverse covariance via
        # AdaptiveGraphLasso + QuicGraphLassoEBIC + method='binary'
        # credit: skggm
        model = AdaptiveQuicGraphicalLasso(
            estimator=QuicGraphicalLassoEBIC(init_method='cov', ),
            method='binary',
        )
        print(
            '\nCalculating AdaptiveQuicGraphLasso precision matrix using skggm...\n'
        )
        model.fit(time_series)
        conn_matrix = -model.estimator_.precision_
    else:
        raise ValueError(
            '\nERROR! No connectivity model specified at runtime. Select a valid estimator using the '
            '-mod flag.')

    if conn_matrix.shape < (2, 2):
        raise RuntimeError(
            '\nERROR! Matrix estimation selection yielded an empty or 1-dimensional graph. '
            'Check time-series for errors or try using a different atlas')

    coords = np.array(coords)
    label_names = np.array(label_names)
    return conn_matrix, conn_model, dir_path, node_size, smooth, dens_thresh, network, ID, roi, min_span_tree, disp_filt, parc, prune, atlas_select, uatlas_select, label_names, coords, c_boot, norm, binary
示例#27
0
    GFPR[l] = FPR[gix]

plot_gamma_influence(gammas, GTPR, GFPR, save=False)

#%%
# solve single GLASSO
ALPHA = 2 * np.logspace(start=-3, stop=-1, num=15, base=10)

FPR_GL = np.zeros(len(ALPHA))
TPR_GL = np.zeros(len(ALPHA))
DFPR_GL = np.zeros(len(ALPHA))
DTPR_GL = np.zeros(len(ALPHA))

for a in np.arange(len(ALPHA)):
    singleGL = GraphicalLasso(alpha=ALPHA[a],
                              tol=1e-6,
                              max_iter=200,
                              verbose=False)
    singleGL_sol = np.zeros((K, p, p))
    for k in np.arange(K):
        #model = quic.fit(S[k,:,:], verbose = 1)
        model = singleGL.fit(sample[k, :, :].T)
        singleGL_sol[k, :, :] = model.precision_

    TPR_GL[a] = discovery_rate(singleGL_sol, Theta)['TPR']
    FPR_GL[a] = discovery_rate(singleGL_sol, Theta)['FPR']
    DTPR_GL[a] = discovery_rate(singleGL_sol, Theta)['TPR_DIFF']
    DFPR_GL[a] = discovery_rate(singleGL_sol, Theta)['FPR_DIFF']

#%%
# solve again for optimal (l1, l2)
l1opt = L1[ix]
示例#28
0
    for l in ls:
        ns = nitk.NeighbourhoodSelection(l)
        ns.fit(X)
        tpr, fpr, prec = nitk.methods.calculate_matrix_accuracy(
            K, ns.precision_)
        neighbourhood_selection_tpr.append(tpr)
        neighbourhood_selection_fpr.append(fpr)
        neighbourhood_selection_precision.append(prec)

    glasso_tpr = []
    glasso_fpr = []
    glasso_precision = []

    for l in ls:
        try:
            gl = GraphicalLasso(l)
            gl.fit(X)
            tpr, fpr, prec = nitk.methods.calculate_matrix_accuracy(
                K, gl.precision_)
            glasso_tpr.append(tpr)
            glasso_fpr.append(fpr)
            glasso_precision.append(prec)
        except FloatingPointError as e:
            print(e)

    space_tpr = []
    space_fpr = []
    space_precision = []

    for l in ls:
        s = nitk.SPACE(l)
示例#29
0
        for _ in range(self.max_iter):
            X = self.update_X(Y, Z, cov)
            Y = self.soft_threshold(X + Z, self.alpha / self.rho)
            Z = Z + self.alpha * (X - Y)

        self.cov = np.linalg.inv(Y)
        self.precision = Y
        return self


if __name__ == "__main__":
    A = load_boston().data
    A = sp.stats.zscore(A, axis=0)

    # ---sklearn---
    model = GraphicalLasso(alpha=0.4, verbose=True)
    model.fit(A)

    cov = np.cov(A.T)
    cov_ = model.covariance_
    pre_ = model.precision_
    model = GraphicalLassoADMM()
    res = model.fit(A)
    #print(res.precision)
    #print(cov_)

    # 普通の共分散行列
    plt.imshow(cov, interpolation='nearest', vmin=0, vmax=1, cmap='jet')
    plt.colorbar()
    plt.figure()
示例#30
0
    def fit(self, X, y):
        """Fit the QDA to the training data"""
        methods = [
            None, 'nonpara', "fr", "kl", "mean", "wass", "reg", "freg",
            "sparse", "kl_new"
        ]
        rules = ["qda", "da", "fda"]
        if self.method not in methods:
            raise ValueError("method must be in {}; got (method={})".format(
                methods, self.method))
        if self.rule not in rules:
            raise ValueError("rule must be in {}; got (rule={})".format(
                rules, self.rule))
        X, y = check_X_y(X, y)

        self.labels_, self.n_samples_ = np.unique(y, return_counts=True)
        self.n_class_ = self.labels_.size
        n_samples, self.n_features_ = X.shape
        self.rho_ = np.array([self.rho]).ravel()
        if self.rho == -1:
            chi_quantile = chi2.ppf(
                0.5,
                self.n_features_ * (self.n_features_ + 3) / 2)
            self.rho_ = chi_quantile * np.ones(self.n_class_) / self.n_samples_
        else:
            if self.rho_.size == 1:
                self.rho_ = self.rho_[0] * np.ones(self.n_class_)
            if self.adaptive:
                self.rho_ *= np.sqrt(self.n_features_)
        # PRINT!!!!
        #print(self.n_features_,  chi_quantile,self.n_samples_,self.rho_)
        if self.priors is None:
            self.priors_ = self.n_samples_ / n_samples
        else:
            self.priors_ = self.priors

        self.mean_ = []
        self.covariance_ = []
        self.cov_sqrt_ = []
        self.prec_ = []
        self.prec_sqrt_ = []
        self.logdet_ = []
        self.rotations_ = []
        self.scalings_ = []
        for n_c, label in enumerate(self.labels_):
            mask = (y == label)
            X_c = X[mask, :]
            X_c_mean = np.mean(X_c, 0)
            X_c_bar = X_c - X_c_mean
            U, s, Vt = np.linalg.svd(X_c_bar, full_matrices=False)
            s2 = (s**2) / (len(X_c_bar) - 1)
            self.mean_.append(X_c_mean)
            if self.method == 'reg':
                s2 += self.rho_[n_c]
                inv_s2 = 1 / s2
            elif self.method in [
                    'fr', 'kl', 'mean', 'freg', 'kl_new', 'nonpara'
            ]:
                sc = StandardScaler()
                X_c_ = sc.fit_transform(X_c)
                cov_c = ledoit_wolf(X_c_)[0]
                cov_c = sc.scale_[:, np.newaxis] * cov_c * sc.scale_[
                    np.newaxis, :]
                s2, V = np.linalg.eigh(cov_c)
                s2 = np.abs(s2)
                inv_s2 = 1 / s2
                Vt = V.T
            elif self.method == 'sparse':
                try:
                    cov_c = GraphicalLasso(alpha=self.rho_[n_c]).fit(X_c_bar)
                    cov_c = cov_c.covariance__
                except:
                    tol = self.tol * 1e6
                    cov_c = graphical_lasso(
                        np.dot(((1 - tol) * s2 + tol) * Vt.T, Vt),
                        self.rho_[n_c])[0]
                s2, V = np.linalg.eigh(cov_c)
                s2 = np.abs(s2)
                inv_s2 = 1 / s2
                Vt = V.T
            elif self.method == 'wass':
                f = lambda gamma: gamma * (self.rho_[n_c] ** 2 - 0.5 * np.sum(s2)) - self.n_features_ + \
                                  0.5 * (np.sum(np.sqrt((gamma ** 2) * (s2 ** 2) + 4 * s2 * gamma)))
                lb = 0
                gamma_0 = 0
                ub = np.sum(np.sqrt(1 / (s2 + self.tol))) / self.rho_[n_c]
                f_ub = f(ub)
                for bsect in range(100):
                    gamma_0 = 0.5 * (ub + lb)
                    f_gamma_0 = f(gamma_0)
                    if f_ub * f_gamma_0 > 0:
                        ub = gamma_0
                        f_ub = f_gamma_0
                    else:
                        lb = gamma_0
                    if abs(ub - lb) < self.tol:
                        break
                inv_s2 = gamma_0 * (1 - 2 / (1 + np.sqrt(1 + 4 /
                                                         (gamma_0 *
                                                          (s2 + self.tol)))))
                s2 = 1 / (inv_s2 + self.tol)
            else:
                s2 += self.tol
                inv_s2 = 1 / s2
            self.covariance_.append(np.dot(s2 * Vt.T, Vt))
            self.cov_sqrt_.append(np.dot(np.sqrt(s2) * Vt.T, Vt))
            self.prec_.append(np.dot(inv_s2 * Vt.T, Vt))
            self.prec_sqrt_.append(np.dot(np.sqrt(inv_s2) * Vt.T, Vt))
            self.logdet_.append(np.log(s2).sum())
            #print(self.logdet_)
            self.rotations_.append(Vt)
            self.scalings_.append(s2)
        return self
示例#31
0
def get_covariance(data,
                   method,
                   lambda_val='CV',
                   do_scale=False,
                   n_cv_folds=None):

    # default cov if it is not calculated properly
    cov = -1

    # scale timecourse
    if do_scale:

        data = scale(data, axis=1)

    # select method
    if method == 'QUIC':

        if lambda_val == 'CV':

            # set up model
            model = QuicGraphicalLassoCV(cv=n_cv_folds)

            # fit data to model and return resulting covariance
            model.fit(np.transpose(data))
            return model.covariance_

        elif lambda_val == 'EBIC':

            # set up model
            model = QuicGraphicalLassoEBIC()

            # fit data to model and return resulting covariance
            model.fit(np.transpose(data))
            return model.covariance_

        elif isinstance(lambda_val,
                        float) and lambda_val > 0 and lambda_val < 1:

            # set up model
            model = QuicGraphicalLasso(lam=lambda_val)

            # fit data to model and return resulting covariance
            model.fit(data)
            return model.covariance_

        else:

            print('Error in QUIC covariance:')
            print(
                'lambda_val must be a float between 0 and 1, "CV" to find the best value by cross-validation, or "EBIC" to use extended Bayesian information criterion for model selection.'
            )

    elif method == 'graphLasso':

        # transpose data as graphLasso likes it this way round
        data = np.transpose(data)

        # select whether to use supplied regularisation parameter or find the
        # best regularisation parameter by cross validation and maximum likelihood
        # use scikit-learn implementation of graph lasso and CV graph lasso
        if lambda_val == 'CV':

            try:

                model = GraphicalLassoCV(max_iter=1500,
                                         cv=n_cv_folds,
                                         assume_centered=True)
                model.fit(data)
                cov = model.covariance_

            except:

                print(
                    'An error in cross validated graphLasso calculation occured.'
                )

        elif isinstance(lambda_val,
                        float) and lambda_val > 0 and lambda_val < 1:

            try:

                model = GraphicalLasso(alpha=lambda_val,
                                       mode='cd',
                                       tol=0.0001,
                                       max_iter=1500,
                                       verbose=False)
                model.fit(data)
                cov = model.covariance_

            except (FloatingPointError, e):

                print(
                    'A floating point error in cross validated graphLasso calculation occured.'
                )
                print(e)

        else:

            print('Error in graphLasso covariance:')
            print(
                'lambda_val must be a float between 0 and 1, or "CV" to find the best value by cross-validation'
            )

    # select method
    else:

        print('Method must be one of "graphLasso" or "QUIC".')

    return cov