def make_sparse_low_rank(n_dim_obs=3,
                         n_dim_lat=2,
                         T=10,
                         epsilon=1e-3,
                         n_samples=50,
                         **kwargs):
    """Generate dataset (new new version)."""
    from sklearn.datasets import make_sparse_spd_matrix, make_low_rank_matrix

    K = make_sparse_spd_matrix(n_dim_obs)
    L = make_low_rank_matrix(n_dim_obs, n_dim_obs, effective_rank=n_dim_lat)

    Ks = [K]
    Ls = [L]
    Kobs = [K - L]

    for i in range(1, T):
        K = K + make_sparse_spd_matrix(n_dim_obs)
        L = L + make_low_rank_matrix(
            n_dim_obs, n_dim_obs, effective_rank=n_dim_lat)

        # assert is_pos_def(K - L)
        # assert is_pos_semidef(L)

        Ks.append(K)
        Ls.append(L)
        Kobs.append(K - L)

    return Ks, Kobs, Ls
Пример #2
0
def get_sparse_high_correlations(dim=25,
                                 seed=1,
                                 rep_num=1000,
                                 sparsity_alpha=0.9):
    """Gets sparse inverse covariance matrix.

  The method draw a few matrices and returns te one where the average
  correlation between variables is the highest.

  Args:
    dim: the dimension of the matrix to be returned.
    seed: seed for reproducibility.
    rep_num: number of matrices to draw and choose from.
    sparsity_alpha: sparsity parameter. see details of make_sparse_spd_matrix.

  Returns:
    A sparse inverse covariance matrix.
  """
    np.random.seed(seed)
    max_mean = 0
    for _ in range(rep_num):
        candidate_matrix = make_sparse_spd_matrix(dim,
                                                  alpha=sparsity_alpha,
                                                  smallest_coef=.4,
                                                  largest_coef=.7)
        candidate_correlations = np.linalg.inv(candidate_matrix)
        diag_part = np.sqrt(
            np.expand_dims(np.diag(candidate_correlations), axis=0))
        candidate_correlations /= diag_part
        candidate_correlations /= diag_part.transpose()
        cur_mean = np.tril(np.abs(candidate_correlations)).mean()
        if max_mean < cur_mean:
            best_candidate = candidate_matrix
            max_mean = cur_mean
    return best_candidate
Пример #3
0
def test_matrix(n, sparse=False, d=-0.5):
    """
    Returns symmetric matrices on which to test algorithms
    
    Inputs:
    
        n: int, matrix size
        
        sparse: bool (False), sparsity
        
        rank: str/int, if 'full', then rank=n, otherwise rank=r in {1,2,...,n}.
        
    Output:
    
        A: double, symmetric positive definite matrix with specified rank
            (hopefully) and sparsity.
    """    
    if sparse:
        #
        # Sparse matrix 
        # 
        A = make_sparse_spd_matrix(dim=n, alpha=0.95, norm_diag=False,
                           smallest_coef=.1, largest_coef=.9);
        A = sp.csc_matrix(A)
    else:
        #
        # Full matrix
        #
        X = np.random.rand(n, n)
        X = X + X.T
        U, dummy, V = linalg.svd(np.dot(X.T, X))
        A = np.dot(np.dot(U, d + np.diag(np.random.rand(n))), V)
         
    return A
Пример #4
0
def gm_params_generator(d, k, sparse_proba=None, alpha=5, min_center_dist=None):
    """
    We generate centers in [-0.5, 0.5] and verify that they are separated enough
    alpha is the size of the grid
    """
    #  we scatter the unit square on k squares, the min distance is given by alpha/sqrt(k)
    if min_center_dist == None:
        min_center_dist = alpha / np.sqrt(k)
    centers = [alpha*(np.random.rand(1, d)[0]-0.5)]
    for i in range(k-1):
        center = alpha*(np.random.rand(1, d)[0]-0.5)
        distances = np.linalg.norm(
            np.array(centers) - np.array(center),
            axis=1)
        while len(distances[distances < min_center_dist]) > 0:
            center = alpha*(np.random.rand(1, d)[0]-0.5)
            distances = np.linalg.norm(
                np.array(centers) - np.array(center),
                axis=1)
        centers.append(center)
    # if sparse_proba is set :
    #    generate covariance matrix with the possibility to set the sparsity on the precision matrix,
    # we multiply by 1/k^2 to avoid overlapping
    if sparse_proba == None:
        A = [random.rand(d, d) for _ in range(k)]
        cov = [alpha * 1e-2 / (k ** 2) * (np.diag(np.ones(d)) + np.dot(a, a.transpose())) for a in A]
    else:
        cov = np.array([np.linalg.inv(make_sparse_spd_matrix(d, alpha=sparse_proba)) for _ in range(k)])
    p = np.random.randint(1000, size=(1, k))[0]
    weights = 1.0*p/p.sum()
    return weights, centers, cov
Пример #5
0
    def prototype_adjacency(self, n_block_features, alpha):
        """Build a new graph.

        Doc for ".create(n_features, alpha)"

        Parameters
        -----------
        n_features : int

        alpha : float (0,1)
            The complexity / sparsity factor.
            This is (1 - alpha_0) in sklearn.datasets.make_sparse_spd_matrix
            where alpha_0 is the probability that a coefficient is zero.

        Returns
        -----------
        (n_features, n_features) matrices: covariance, precision, adjacency
        """
        return make_sparse_spd_matrix(
            n_block_features,
            alpha=np.abs(1.0 - alpha),
            smallest_coef=self.spd_low,
            largest_coef=self.spd_high,
            random_state=self.prng,
        )
Пример #6
0
 def make_correlation_matrix(asvs,
                             prefix,
                             norm_diag=1,
                             alpha=0.9,
                             smallest_coef=0.1,
                             largest_coef=0.9):
     """ Create a correlation matrix: symmetric, definite positive (diagonal >0) sparse (many 0)"""
     # alpha: The probability that a coefficient is zero (see notes). Larger values enforce more sparsity.
     # norm_diag: Whether to normalize the output matrix to make the leading diagonal elements all 1
     # smallest_coef: The value of the smallest coefficient
     # largest_coef: The value of the largest coefficient
     # rows = ['Sp' + str(i) for i in range(nSpecies)]
     # columns = ['Sp' + str(i) for i in range(nSpecies)]
     corrMatrix = make_sparse_spd_matrix(len(asvs),
                                         alpha=alpha,
                                         norm_diag=norm_diag,
                                         smallest_coef=smallest_coef,
                                         largest_coef=largest_coef)
     #corrDf = pd.DataFrame(corrMatrix, index = rows, columns = columns)
     CorrMatrixDF = write_table(corrMatrix,
                                outputDir=os.getcwd(),
                                title='{}.correlationMatrix'.format(prefix),
                                rows=asvs,
                                columns=asvs,
                                dataframe=True)
     #plot_heatmap(CorrMatrixDF, outputDir = os.getcwd(), vmin = -1, vmax = 1, center = 0, title = '{}.correlationMatrix'.format(prefix), legendtitle = 'Correlation', text = None, symmetric = True)
     return (corrMatrix)
def gm_params_gen(d, k):
    centers = np.random.randint(20, size=(k, d)) - 10
    cov = np.array(
        [np.linalg.inv(make_sparse_spd_matrix(d)) for _ in range(k)])
    p = np.random.randint(1000, size=(1, k))[0]
    weights = 1.0 * p / p.sum()
    return weights, centers, cov
Пример #8
0
def random_er_network(n_features, alpha,random_state=np.random.RandomState(1)):
    adj = make_sparse_spd_matrix(n_features,
                                  alpha=alpha, # prob that a coeff is zero
                                  smallest_coef=0.7,
                                  largest_coef=0.7,
                                  random_state=random_state)
    return adj
Пример #9
0
    def inv_cov(self, low=0.3, upper=0.6, p=0.2, symmetric=True) -> np.array:
        """Generate inverse covariance matrices for n_features

        Parameters
        ----------
        low : float, default = 0.3
            Lower bound of inverse covariance values between features.

        upper : float, default = 0.6
            Upper bound of inverse covariance values between features.

        p : float > 0, default = 0.2
            Probability of edge between nodes in random graph, ie inverse
        covariance matrix sparsity.

        Returns
        -------
        S : array (n_features, n_features)
            Randomly generated covariance matrix.
        """
        rs = self.rng.integers(10000)
        return make_sparse_spd_matrix(dim=self.n_features,
                                      alpha=1 - p,
                                      smallest_coef=low,
                                      largest_coef=upper,
                                      random_state=rs)
def generate_random_sparse_psd(p, zero_entry_chance=0.75):
    """
    Generate a random sparse PSD array.
    :param p: The dimension.
    :param zero_entry_chance: Zero-entry chance.
    :return: The PSD array.
    """
    return datasets.make_sparse_spd_matrix(p, alpha=zero_entry_chance)
Пример #11
0
 def test_neighbourhood_selection_overall_cv(self):
     p = 10
     n = 200
     l = 0.5
     K = make_sparse_spd_matrix(p, 0.7)
     C = np.linalg.inv(K)
     X = np.random.multivariate_normal(np.zeros(p), C, n)
     ns = nitk.NeighbourhoodSelectionCV()
     ns.fit(X)
Пример #12
0
    def test_sparse_inv_covariance(self, q, alpha_ratio):
        # minimize -log(det(S)) + trace(S*Q) + \alpha*||S||_1 subject to S is symmetric PSD.

        # Problem data.
        # q: Dimension of matrix.
        p = 1000  # Number of samples.
        ratio = 0.9  # Fraction of zeros in S.

        S_true = sparse.csc_matrix(make_sparse_spd_matrix(q, ratio))
        Sigma = sparse.linalg.inv(S_true).todense()
        z_sample = sp.linalg.sqrtm(Sigma).dot(np.random.randn(q, p))
        Q = np.cov(z_sample)

        mask = np.ones(Q.shape, dtype=bool)
        np.fill_diagonal(mask, 0)
        alpha_max = np.max(np.abs(Q)[mask])
        alpha = alpha_ratio * alpha_max  # 0.001 for q = 100, 0.01 for q = 50

        # Convert problem to standard form.
        # f_1(S) = -log(det(S)) + trace(S*Q) on symmetric PSD matrices, f_2(S) = \alpha*||S||_1.
        # A_1 = I, A_2 = -I, b = 0.
        prox_list = [
            lambda v, t: prox_neg_log_det(
                v.reshape(
                    (q, q), order='C'), t, lin_term=t * Q).ravel(order='C'),
            lambda v, t: prox_norm1(v, t * alpha)
        ]
        A_list = [sparse.eye(q * q), -sparse.eye(q * q)]
        b = np.zeros(q * q)

        # Solve with DRS.
        drs_result = a2dr(prox_list,
                          A_list,
                          b,
                          anderson=False,
                          precond=True,
                          max_iter=self.MAX_ITER)
        #drs_result = a2dr(prox_list, A_list, b, anderson=True, precond=True, max_iter=self.MAX_ITER, ada_reg=False)
        #drs_result = a2dr(prox_list, A_list, b, anderson=True, precond=True, max_iter=self.MAX_ITER, ada_reg=False, lam_accel=0)
        #drs_result = a2dr(prox_list, A_list, b, anderson=True, precond=True, max_iter=self.MAX_ITER, ada_reg=False, lam_accel=1e-12)
        print('Finished DRS.')

        # Solve with A2DR.
        a2dr_result = a2dr(prox_list,
                           A_list,
                           b,
                           anderson=True,
                           precond=True,
                           max_iter=self.MAX_ITER)
        #a2dr_result = a2dr(prox_list, A_list, b, anderson=True, precond=True, max_iter=self.MAX_ITER, lam_accel=1e-12)
        # lam_accel = 0 seems to work well sometimes, although it oscillates a lot.
        a2dr_S = a2dr_result["x_vals"][-1].reshape((q, q), order='C')
        self.compare_total(drs_result, a2dr_result)
        print('Finished A2DR.')
        print('recovered sparsity = {}'.format(
            np.sum(a2dr_S != 0) * 1.0 / a2dr_S.shape[0]**2))
Пример #13
0
def add_noise(theta, p, alpha, threshold=0.1):
    noise_mat = make_sparse_spd_matrix(dim=p,
                                       alpha=alpha,
                                       norm_diag=False,
                                       smallest_coef=-threshold,
                                       largest_coef=threshold)
    np.fill_diagonal(theta, 0.0)
    theta_star = cov_nearest(noise_mat + theta,
                             method="clipped",
                             threshold=0.1)
    return theta_star
Пример #14
0
 def test_neighbourhood_selection(self):
     """
     Create a sparse matrix that we attempt to estimate
     """
     p = 10
     n = 200
     l = 0.5
     K = make_sparse_spd_matrix(p, 0.7)
     C = np.linalg.inv(K)
     X = np.random.multivariate_normal(np.zeros(p), C, n)
     ns = nitk.NeighbourhoodSelection(l)
     ns.fit(X)
Пример #15
0
    def _gista(self, theta0, S, _lambdas, verbose=False):
        """
        G-ISTA algorithm

        https://papers.nips.cc/paper/4574-iterative-thresholding-algorithm-for-sparse-inverse-covariance-estimation.pdf
        """

        theta = theta0
        t = min(np.linalg.eigvals(theta0))**2
        p = len(theta)

        if verbose:
            print(f'f(X,S) = {self.sfunc.eval(theta0, S)}')
            print(f'g(X,rho) = {self.nsfunc.eval(theta0, _lambdas)}')
            print(
                f'Initial Objective: {self._pgm_objective(theta0, S, _lambdas)}'
            )

        if self._pgm_objective(self.theta0, S, _lambdas) > 10000:
            # Skip, bad starting point
            theta = make_sparse_spd_matrix(p,
                                           alpha=0.5,
                                           norm_diag=False,
                                           smallest_coef=-1.0,
                                           largest_coef=1.0)

        for i in range(self.max_iters):
            if not _is_pos_def(theta):
                print('Clipped Precision matrix')
                theta = cov_nearest(theta, method="clipped", threshold=0.1)

            if self.ss_type == 'backtracking':
                t = self._step_size(theta, S, _lambdas, t)

            delta = self._duality_gap(p, theta, S, _lambdas)

            if verbose:
                print(f'Duality Gap: {delta}.')

            if delta < self.epsilon and self.dual_gap:
                print(f'iterations: {i}')
                print(f'Duality Gap: {delta} < {self.epsilon}. Exiting.')
                break

            theta_k1 = self.nsfunc.prox(
                theta - t * self.sfunc.gradient(theta, S), _lambdas)

            if self.ss_type == 'backtracking':
                t = _set_next_inital_step_size(theta_k1, theta)

            theta = theta_k1

        return theta
Пример #16
0
def makeDataset(n_dimensions, n_total, random_states=[None, None]):
    '''
    Generate a n_dimensions-D dataset by sampling from two Gaussian of fixed properties.
    Inputs: n_dimension   = number of dimensions 
            n_total       = total number of events to generate
            random_states = list of two numpy.random.RandomState objects, or 
                            integer to seed internal RandomState objects
    Output: array containing generated n_dimensional-D data
    '''
    # Create the covariance matrices for the two component Gaussians
    # random_states are specified for reproducibility
    cov1 = make_sparse_spd_matrix(
        dim=n_dimensions,
        alpha=0.1,
        random_state=47,
        norm_diag=True,
    )
    cov2 = make_sparse_spd_matrix(
        dim=n_dimensions,
        alpha=-0.5,
        random_state=1701,
        norm_diag=True,
    )
    # Create mean position of first Gaussian.
    np.random.seed(52)
    mu1 = np.random.rand(1, n_dimensions)[0]

    # Create data from first Gaussian component
    X1 = stats.multivariate_normal.rvs(
        mean=mu1,
        cov=cov1,
        size=int(0.667 * n_total),
        random_state=random_states[0],
    )
    # Second Gaussian mean is fixed to be shifted by -1 from that of first
    X2 = stats.multivariate_normal.rvs(mean=mu1 - 1.,
                                       cov=cov2,
                                       size=int(0.333 * n_total),
                                       random_state=random_states[1])
    return np.append(X1, X2, axis=0)
Пример #17
0
def main():
    with open(OUTPUT_DIR + 'coef_original_beta.csv', 'w') as file:
        file.write(','.join(map(str, beta[0])))
    '''generate dataset'''
    X_in_all_alpha = {}
    y_in_all_alpha = {}
    for a in all_alpha:
        all_X = []
        all_y = []
        for j in range(0, N):
            # Generate a sparse symmetric definite positive matrix.
            sigma = make_sparse_spd_matrix(p,
                                           alpha=a,
                                           smallest_coef=-1,
                                           largest_coef=1,
                                           norm_diag=False)
            if (j + 1) % 20 == 0:
                sigma.tofile(
                    OUTPUT_DIR +
                    'sigma_alpha={}_{}th-example.txt'.format(a, j + 1),
                    sep=",",
                    format="%s")
            X, y = generate_data(n, PARAMS['prob'], mu, sigma, beta)
            all_X.append(X)
            all_y.append(y)
        X_in_all_alpha[a] = all_X
        y_in_all_alpha[a] = all_y
    result = {}
    for m in models:
        train_acc, test_acc = evaluation(all_alpha,
                                         X_in_all_alpha,
                                         y_in_all_alpha,
                                         N,
                                         p,
                                         n,
                                         model=m)
        result[m] = (train_acc, test_acc)
    with open(OUTPUT_DIR + 'accuracy_comparison.csv', 'w') as fout:
        fout.write(','.join([
            'alpha', 'lasso-train', 'lasso-test', 'dlda-train', 'dlda-test',
            'svm-train', 'svm-test', 'tc-train', 'tc-test'
        ]))
        fout.write('\n')
        for i, a in enumerate(all_alpha):
            output_list = [
                a, result['lasso'][0][i], result['lasso'][1][i],
                result['dlda'][0][i], result['dlda'][1][i],
                result['svm'][0][i], result['svm'][1][i], result['tc'][0][i],
                result['tc'][1][i]
            ]
            fout.write(','.join(map(str, output_list)))
            fout.write('\n')
Пример #18
0
def sample_data():
    n_samples = np.random.randint(100, 30000)
    features_coeff = np.random.choice(np.linspace(0.1, 3))
    n_features = int(n_samples * features_coeff)
    alpha = beta(10, 1).rvs()
    X = np.abs(
        make_sparse_spd_matrix(dim=n_samples,
                               alpha=alpha,
                               norm_diag=False,
                               smallest_coef=0.1,
                               largest_coef=0.7))
    k = np.random.randint(5, 50)
    return X, k
Пример #19
0
 def test_correlation_permutation(self):
     """
     Generates a distribution with a sparse covariance matrix and sees if the non-zero values are correctly picked up
     by the correlation permuter
     """
     p = 10
     n = 200
     C = make_sparse_spd_matrix(p, 0.7)
     X = np.random.multivariate_normal(np.zeros(p), C, n)
     corr_model = correlation_permuter.CorrelationPermutationNetwork()
     corr_model.fit(X)
     corr = corr_model.correlation_
     C = methods.threshold_matrix(C, 0.001, binary=True)
     corr = methods.threshold_matrix(corr, 0.001, binary=True)
Пример #20
0
def _new_graph(n_features, alpha):
    global prng
    prec = make_sparse_spd_matrix(n_features,
                                  alpha=alpha, # prob that a coeff is zero
                                  smallest_coef=0.7,
                                  largest_coef=0.7,
                                  random_state=prng)
    cov = np.linalg.inv(prec)
    d = np.sqrt(np.diag(cov))
    cov /= d
    cov /= d[:, np.newaxis]
    prec *= d
    prec *= d[:, np.newaxis]
    return cov, prec
Пример #21
0
 def test_scio_bic_columnwise(self):
     """
     Generates a distribution with a sparse precision matrix and sees if the non-zero values are correctly picked up
     by SCIO using BIC over each column
     """
     p = 10
     n = 200
     K = make_sparse_spd_matrix(p, 0.7)
     C = np.linalg.inv(K)
     X = np.random.multivariate_normal(np.zeros(p), C, n)
     sc = SCIOColumnBIC()
     sc.fit(X)
     K = methods.threshold_matrix(K, 0.001, binary=True)
     prec_ = methods.threshold_matrix(sc.precision_, 0.001, binary=True)
Пример #22
0
 def test_clime_cv(self):
     """
     Sees how CLIME performs when we use cross validation to select lambda
     """
     p = 50
     n = 10
     K = make_sparse_spd_matrix(p, 0.7)
     C = np.linalg.inv(K)
     X = np.random.multivariate_normal(np.zeros(p), C, n)
     l = 0.5
     r_prec = self._estimate_precision_matrix_using_r(X, l)
     print(r_prec)
     cl = CLIMECV(True)
     cl.fit(X)
     print(cl.precision_)
Пример #23
0
def make_data(n_samples, n_features):
    prng = np.random.RandomState(1)
    prec = make_sparse_spd_matrix(n_features, alpha=.98,
                              smallest_coef=.4,
                              largest_coef=.7,
                              random_state=prng)
    cov = np.linalg.inv(prec)
    d = np.sqrt(np.diag(cov))
    cov /= d
    cov /= d[:, np.newaxis]
    prec *= d
    prec *= d[:, np.newaxis]
    X = prng.multivariate_normal(np.zeros(n_features), cov, size=n_samples)
    X -= X.mean(axis=0)
    X /= X.std(axis=0)
    return X, cov, prec
Пример #24
0
    def test_scaled_lasso_precision_network(self):
        """
        We test our implementation of the scaled lasso
        based precision matrix estimation against that of the authors.
        This sometimes fails, as long as the tolerence is low that's ok
        """
        p = 10
        n = 200
        K = make_sparse_spd_matrix(p, 0.7)
        C = np.linalg.inv(K)
        X = np.random.multivariate_normal(np.zeros(p), C, n)
        sli = scaled_lasso.ScaledLassoInference()
        sli.fit(X)
        prec_r = self._estimate_precision_matrix_using_r(X)

        assert_array_almost_equal(prec_r, sli.precision_, decimal=1)
def test_graphical_lasso_cv(random_state=1):
    # Sample data from a sparse multivariate normal
    dim = 5
    n_samples = 6
    random_state = check_random_state(random_state)
    prec = make_sparse_spd_matrix(dim, alpha=0.96, random_state=random_state)
    cov = linalg.inv(prec)
    X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
    # Capture stdout, to smoke test the verbose mode
    orig_stdout = sys.stdout
    try:
        sys.stdout = StringIO()
        # We need verbose very high so that Parallel prints on stdout
        GraphicalLassoCV(verbose=100, alphas=5, tol=1e-1).fit(X)
    finally:
        sys.stdout = orig_stdout
Пример #26
0
    def test_scio_with_diag_penalty(self):
        """
        Generates a distribution with a sparse precision matrix and sees if the non-zero values are correctly picked up
        by SCIO
        """
        p = 10
        n = 200
        K = make_sparse_spd_matrix(p, 0.7)
        C = np.linalg.inv(K)
        X = np.random.multivariate_normal(np.zeros(p), C, n)
        l = 0.5
        sc = SCIO(l, penalize_diag=True)
        sc.fit(X)

        r_prec = self._estimate_precision_matrix_using_r(X, l, True)
        assert_array_almost_equal(r_prec, sc.precision_, decimal=4)
Пример #27
0
def get_synthetic_data(n_samples,
                       n_features,
                       precision_matrix=None,
                       alpha=0.98,
                       seed=1):
    """
    Generate synthetic data using a covariance matrix obtained by inverting
    a randomly generated precision matrix.

    Args:
        n_samples ([type]): [description]
        n_features ([type]): [description]
        precision_matrix ([type], optional): [description]. Defaults to None.
        alpha (float, optional): [description]. Defaults to 0.98.
        seed (int, optional): [description]. Defaults to 1.

    Returns:
        tuple: a tuple with two elements. The first is a pd.DataFrame
            represeting the data. The second is the precision matrix used to
            generate the data.
    """
    prng = np.random.RandomState(seed)
    if precision_matrix is None:
        prec = make_sparse_spd_matrix(n_features,
                                      alpha=alpha,
                                      smallest_coef=.1,
                                      largest_coef=.9,
                                      random_state=prng)
    else:
        prec = precision_matrix
    cov = linalg.inv(prec)
    d = np.sqrt(np.diag(cov))
    cov /= d
    cov /= d[:, np.newaxis]
    prec *= d
    prec *= d[:, np.newaxis]
    X = prng.multivariate_normal(np.zeros(n_features), cov, size=n_samples)
    X -= X.mean(axis=0)
    X /= X.std(axis=0)

    X = pd.DataFrame(X)

    X.columns = ["gene" + str(i) for i in X.columns]
    X.index = ["sample" + str(i) for i in X.index]

    return X, prec
Пример #28
0
def generate_latent_network(n_obs=100,
                            n_lat=10,
                            n_samples=500,
                            sparsity_obs=0.3,
                            sparsity_lat=0.7,
                            sparsityinter=0.3,
                            random_state=None):

    #     random_state = check_random_state(random_state)
    #     glob =  np.zeros((n_obs+n_lat, n_obs+n_lat))
    #     glob[n_lat:, n_lat:] = make_sparse_spd_matrix(dim=n_obs, alpha=1-sparsity_obs,
    #                                    random_state=random_state)
    #     glob[:n_lat, :n_lat] = make_sparse_spd_matrix(dim=n_lat, alpha=1-sparsity_lat,
    #                                    random_state=random_state)
    #     inter = np.zeros((n_obs, n_lat))
    #     prod = np.array(list(product(np.arange(0, n_obs), np.arange(0, n_lat))))
    #     np.random.shuffle(prod)
    #     length = int(prod.shape[0]*(1-sparsityinter))
    #     indices_r = [p[0] for p in prod[:length]]
    #     indices_c = [p[1] for p in prod[:length]]
    #     inter[indices_r, indices_c] = random_state.randn(length)
    #     inter /= 1e-6
    #     glob[n_lat:, :n_lat] = inter
    #     glob[:n_lat, n_lat:] = inter.T
    #     #sum_ = np.sum(glob[:n_lat, :n_lat], axis=0) + np.sum(inter, axis=0)
    #     #glob[:n_lat, :n_lat] += np.diag(sum_)

    #     T_obs = glob[n_lat:, n_lat:] - \
    #             inter.dot(np.linalg.inv(glob[:n_lat, :n_lat])).dot(inter.T)
    #     print(is_pos_semi_def(inter.dot(np.linalg.inv(glob[:n_lat, :n_lat])).dot(inter.T)))
    #     samples = np.random.multivariate_normal(np.zeros(n_obs),
    #                                             np.linalg.inv(T_obs), n_samples)
    A = make_sparse_spd_matrix(dim=n_obs + n_lat,
                               alpha=sparsity_obs,
                               random_state=random_state)

    T_true = A[n_lat:, n_lat:]
    K_true = A[n_lat:, :n_lat]
    H_true = A[0:n_lat, 0:n_lat]
    per_cov = K_true * 0.3
    T_obs = T_true - per_cov.dot(np.linalg.inv(H_true)).dot(per_cov.T)
    print(is_pos_semi_def(per_cov.dot(np.linalg.inv(H_true)).dot(per_cov.T)))
    samples = np.random.multivariate_normal(np.zeros(n_obs),
                                            np.linalg.inv(T_obs), n_samples)

    return T_obs, T_true, H_true, K_true, samples
Пример #29
0
    def test_clime(self):
        """
        Generates a distribution with a sparse precision matrix and sees if the non-zero values are correctly picked up
        by the CLIME
        """
        p = 50
        n = 10
        K = make_sparse_spd_matrix(p, 0.7)
        C = np.linalg.inv(K)
        X = np.random.multivariate_normal(np.zeros(p), C, n)
        l = 0.5
        r_prec = self._estimate_precision_matrix_using_r(X, l)
        print(r_prec)
        cl = CLIME(l, True)
        cl.fit(X)

        assert_array_almost_equal(r_prec, cl.precision_, decimal=2)
Пример #30
0
    def para_gen(n_area):
        sparsity = np.random.uniform(0.1, 0.7)
        lower_b = np.random.uniform(-0.1, 0.1) * scale
        upper_b = np.random.uniform(lower_b, 0.1) * scale

        if which_data == 1:
            which_kind = 1.0 / 6
        elif which_data == 2:
            which_kind = 1.0 / 2
        elif which_data == 3:
            which_kind = 5.0 / 6
        else:
            which_kind = np.random.rand()

        def rand_1(n):
            return np.random.uniform(-1, 1, n)

        if which_kind < 1.0 / 3:
            tmp = -make_sparse_spd_matrix(n_area,
                                          1 - sparsity,
                                          smallest_coef=lower_b,
                                          largest_coef=upper_b)
        elif which_kind < 2.0 / 3:
            while True:
                lower_b = np.random.uniform(-0.1, 0.1)
                upper_b = np.random.uniform(lower_b, 0.1)
                tmp = np.random.uniform(-1, 1, (n_area, n_area))

                tmp = tmp - np.sort(np.real(
                    LA.eig(tmp)[0]))[-1] * np.eye(n_area)
                fmax = np.amax(tmp)
                fmin = np.amin(tmp)
                tmp = (upper_b - lower_b) / (fmax - fmin) * tmp + (
                    lower_b * fmax - upper_b * fmin) / (fmax - fmin)
                tmp = (abs(sp.random(n_area, n_area, density=sparsity).A) >
                       0) * tmp
                if np.sort(np.real(LA.eig(tmp)[0]))[-1] <= 0:
                    break
        else:
            #tmp = np.random.uniform(-1, 1, (n_area,n_area))
            tmp = sp.random(n_area, n_area, density=sparsity,
                            data_rvs=rand_1).A
            tmp = (tmp - tmp.T) / 2
            tmp = abs(lower_b) / np.amax(abs(tmp)) * tmp
        return tmp
Пример #31
0
def instance(n, p, alpha, rho):
    # Generate the data
    prec = make_sparse_spd_matrix(p, alpha=alpha,
                                  smallest_coef=rho,
                                  largest_coef=rho,
                                  norm_diag=True)
    off_diagonal = ~np.identity(p, dtype=bool)
    nonzero = np.where(prec[off_diagonal] != 0)[0] 

    cov = np.linalg.inv(prec)
    d = np.sqrt(np.diag(cov))
    cov /= d
    cov /= d[:, np.newaxis]
    prec *= d
    prec *= d[:, np.newaxis]
    X = np.random.multivariate_normal(np.zeros(p), cov, size=n)
    X /= np.sqrt(n)

    return X, prec, nonzero 
Пример #32
0
# Copyright: INRIA

import numpy as np
from scipy import linalg
from sklearn.datasets import make_sparse_spd_matrix
from sklearn.covariance import GraphLassoCV, ledoit_wolf
import pylab as pl

##############################################################################
# Generate the data
n_samples = 60
n_features = 20

prng = np.random.RandomState(1)
prec = make_sparse_spd_matrix(n_features, alpha=.98,
                              smallest_coef=.4,
                              largest_coef=.7,
                              random_state=prng)
cov = linalg.inv(prec)
d = np.sqrt(np.diag(cov))
cov /= d
cov /= d[:, np.newaxis]
prec *= d
prec *= d[:, np.newaxis]
X = prng.multivariate_normal(np.zeros(n_features), cov, size=n_samples)
X -= X.mean(axis=0)
X /= X.std(axis=0)

##############################################################################
# Estimate the covariance
emp_cov = np.dot(X.T, X) / n_samples
from optparse import OptionParser, Option
import numpy as np
from sklearn.datasets import make_sparse_spd_matrix

parser = OptionParser()
parser.add_option("-n", "--nodes", dest="nodes", type="int", default=10, help="Number of nodes")
parser.add_option("-s", "--bkgrnd_sparsity", dest="bkgrnd_sparsity", type="float", default=0.95, help="Sparsity of generated precision matrix")
parser.add_option("-d", "--delta_sparsity", dest="delta_sparsity", type="float", default=0.95, help="Sparsity of generated delta precision matrix")
parser.add_option("-b", "--bkgrnd_datapoints", dest="bkgrnd_datapoints", type="int", default=100000, help="Number of background datapoints")
parser.add_option("-f", "--foregrnd_datapoints", dest="foregrnd_datapoints", type="int", default=100000, help="Number of foreground datapoints")
(options, args) = parser.parse_args()

mean = [0.0 for n in range(options.nodes)]
bkgrnd_prec = make_sparse_spd_matrix(options.nodes, alpha=options.bkgrnd_sparsity, smallest_coef=0.5, largest_coef=0.9, random_state=np.random.RandomState(1), norm_diag=True)
delta_prec = make_sparse_spd_matrix(options.nodes, alpha=options.delta_sparsity, smallest_coef=0.5, largest_coef=0.9, random_state=np.random.RandomState(100), norm_diag=True)
foregrnd_prec = bkgrnd_prec + delta_prec
bkgrnd_cov = np.linalg.inv(bkgrnd_prec)
foregrnd_cov = np.linalg.inv(foregrnd_prec)
bkgrnd_data = np.random.multivariate_normal(mean,bkgrnd_cov,options.bkgrnd_datapoints)
foregrnd_data = np.random.multivariate_normal(mean,foregrnd_cov,options.foregrnd_datapoints)

np.savetxt('mean.csv', mean, delimiter=',')
np.savetxt('bkgrnd_prec.csv', bkgrnd_prec, delimiter=',')
np.savetxt('delta_prec.csv', delta_prec, delimiter=',')
np.savetxt('foregrnd_prec.csv', foregrnd_prec, delimiter=',')
np.savetxt('bkgrnd_data.csv', bkgrnd_data, delimiter=',')
np.savetxt('foregrnd_data.csv', foregrnd_data, delimiter=',')