def test_graphical_lasso_iris():
    # Hard-coded solution from R glasso package for alpha=1.0
    # (need to set penalize.diagonal to FALSE)
    cov_R = np.array(
        [
            [0.68112222, 0.0000000, 0.265820, 0.02464314],
            [0.00000000, 0.1887129, 0.000000, 0.00000000],
            [0.26582000, 0.0000000, 3.095503, 0.28697200],
            [0.02464314, 0.0000000, 0.286972, 0.57713289],
        ]
    )
    icov_R = np.array(
        [
            [1.5190747, 0.000000, -0.1304475, 0.0000000],
            [0.0000000, 5.299055, 0.0000000, 0.0000000],
            [-0.1304475, 0.000000, 0.3498624, -0.1683946],
            [0.0000000, 0.000000, -0.1683946, 1.8164353],
        ]
    )
    X = datasets.load_iris().data
    emp_cov = empirical_covariance(X)
    for method in ("cd", "lars"):
        cov, icov = graphical_lasso(emp_cov, alpha=1.0, return_costs=False, mode=method)
        assert_array_almost_equal(cov, cov_R)
        assert_array_almost_equal(icov, icov_R)
def test_graphical_lasso_iris_singular():
    # Small subset of rows to test the rank-deficient case
    # Need to choose samples such that none of the variances are zero
    indices = np.arange(10, 13)

    # Hard-coded solution from R glasso package for alpha=0.01
    cov_R = np.array([
        [0.08, 0.056666662595, 0.00229729713223, 0.00153153142149],
        [0.056666662595, 0.082222222222, 0.00333333333333, 0.00222222222222],
        [0.002297297132, 0.003333333333, 0.00666666666667, 0.00009009009009],
        [0.001531531421, 0.002222222222, 0.00009009009009, 0.00222222222222]
    ])
    icov_R = np.array([
        [24.42244057, -16.831679593, 0.0, 0.0],
        [-16.83168201, 24.351841681, -6.206896552, -12.5],
        [0.0, -6.206896171, 153.103448276, 0.0],
        [0.0, -12.499999143, 0.0, 462.5]
    ])
    X = datasets.load_iris().data[indices, :]
    emp_cov = empirical_covariance(X)
    for method in ('cd', 'lars'):
        cov, icov = graphical_lasso(emp_cov, alpha=0.01, return_costs=False,
                                    mode=method)
        assert_array_almost_equal(cov, cov_R, decimal=5)
        assert_array_almost_equal(icov, icov_R, decimal=5)
def graphical_lasso_wrap(emp_cov, alpha, max_iter):

    try:
        _, precision = graphical_lasso(emp_cov[0], alpha=alpha, max_iter=max_iter)
        return precision
    except FloatingPointError:
        return graphical_lasso_wrap(emp_cov, alpha=alpha * 1.1, max_iter=max_iter)
Пример #4
0
def test_graphical_lasso_iris_singular():
    # Small subset of rows to test the rank-deficient case
    # Need to choose samples such that none of the variances are zero
    indices = np.arange(10, 13)

    # Hard-coded solution from R glasso package for alpha=0.01
    cov_R = np.array([
        [0.08, 0.056666662595, 0.00229729713223, 0.00153153142149],
        [0.056666662595, 0.082222222222, 0.00333333333333, 0.00222222222222],
        [0.002297297132, 0.003333333333, 0.00666666666667, 0.00009009009009],
        [0.001531531421, 0.002222222222, 0.00009009009009, 0.00222222222222],
    ])
    icov_R = np.array([
        [24.42244057, -16.831679593, 0.0, 0.0],
        [-16.83168201, 24.351841681, -6.206896552, -12.5],
        [0.0, -6.206896171, 153.103448276, 0.0],
        [0.0, -12.499999143, 0.0, 462.5],
    ])
    X = datasets.load_iris().data[indices, :]
    emp_cov = empirical_covariance(X)
    for method in ("cd", "lars"):
        cov, icov = graphical_lasso(emp_cov,
                                    alpha=0.01,
                                    return_costs=False,
                                    mode=method)
        assert_array_almost_equal(cov, cov_R, decimal=5)
        assert_array_almost_equal(icov, icov_R, decimal=5)
Пример #5
0
 def fit_qda(self, data, lbls, idx):
     self.di_moments = dict(
         zip(self.uy, [{
             'mu': np.repeat(0, self.p).astype(float),
             'Sigma': np.zeros([self.p, self.p]),
             'iSigma': np.zeros([self.p, self.p])
         } for z in self.uy]))
     for yy in self.uy:
         self.di_moments[yy]['n'] = self.ny[yy]
         for ii in idx[yy]:
             x_ii = np.c_[self.enc.cenc.transform(data.iloc[ii,
                                                            self.enc.cidx]),
                          self.enc.nenc.transform(data.iloc[ii,
                                                            self.enc.nidx])]
             self.di_moments[yy]['mu'] += x_ii.sum(axis=0)
             self.di_moments[yy]['Sigma'] += x_ii.T.dot(x_ii)
         # Adjust raw numbers
         self.di_moments[yy]['mu'] = self.di_moments[yy]['mu'].reshape(
             [self.p, 1]) / self.ny[yy]
         self.di_moments[yy]['Sigma'] = (
             self.di_moments[yy]['Sigma'] - self.ny[yy] *
             self.di_moments[yy]['mu'].dot(self.di_moments[yy]['mu'].T)) / (
                 self.ny[yy] - 1)
         #self.di_moments[yy]['ldet'] = np.log(np.linalg.det(self.di_moments[yy]['Sigma']))
         #self.di_moments[yy]['iSigma'] = np.linalg.pinv(self.di_moments[yy]['Sigma'])
         self.di_moments[yy]['iSigma'] = graphical_lasso(
             emp_cov=self.di_moments[yy]['Sigma'], alpha=0.001)
def test_graph_lasso_2D():
    # Hard-coded solution from Python skggm package
    # obtained by calling `quic(emp_cov, lam=.1, tol=1e-8)`
    cov_skggm = np.array([[3.09550269, 1.186972], [1.186972, 0.57713289]])

    icov_skggm = np.array([[1.52836773, -3.14334831], [-3.14334831, 8.19753385]])
    X = datasets.load_iris().data[:, 2:]
    emp_cov = empirical_covariance(X)
    for method in ("cd", "lars"):
        cov, icov = graphical_lasso(emp_cov, alpha=0.1, return_costs=False, mode=method)
        assert_array_almost_equal(cov, cov_skggm)
        assert_array_almost_equal(icov, icov_skggm)
Пример #7
0
    def fit(self, X, W=None):
        '''
		X: data matrix, (n x d)
		each row corresponds to a single instance
		Must be shifted to zero already.
		
		W: connectivity graph, (n x n)
		+1 for positive pairs, -1 for negative.
		'''
        print('SDML.fit ...', numpy.shape(X))
        self.mean_ = numpy.mean(X, axis=0)
        X = numpy.matrix(X - self.mean_)
        # set up prior M
        #print 'X', X.shape
        if self.use_cov:
            M = np.cov(X.T)
        else:
            M = np.identity(X.shape[1])
        if W is None:
            W = np.ones((X.shape[1], X.shape[1]))
        #print 'W', W.shape
        L = laplacian(W, normed=False)
        #print 'L', L.shape
        inner = X.dot(L.T)
        loss_matrix = inner.T.dot(X)
        #print 'loss', loss_matrix.shape

        #print 'pinv', pinvh(M).shape
        P = pinvh(M) + self.balance_param * loss_matrix
        #print 'P', P.shape
        emp_cov = pinvh(P)
        # hack: ensure positive semidefinite
        emp_cov = emp_cov.T.dot(emp_cov)
        M, _ = graphical_lasso(emp_cov,
                               self.sparsity_param,
                               verbose=self.verbose)
        self.M = M
        C = numpy.linalg.cholesky(self.M)
        self.dewhiten_ = C
        self.whiten_ = numpy.linalg.inv(C)
        # U: rotation matrix, S: scaling matrix
        #U, S, _ = scipy.linalg.svd(M)
        #s = np.sqrt(S.clip(self.EPS))
        #s_inv = np.diag(1./s)
        #s = np.diag(s)
        #self.whiten_ = np.dot(np.dot(U, s_inv), U.T)
        #self.dewhiten_ = np.dot(np.dot(U, s), U.T)
        #print 'M:', M
        print('SDML.fit done')
Пример #8
0
def test_graph_lasso_2D():
    # Hard-coded solution from Python skggm package
    # obtained by calling `quic(emp_cov, lam=.1, tol=1e-8)`
    cov_skggm = np.array([[3.09550269, 1.186972],
                         [1.186972, 0.57713289]])

    icov_skggm = np.array([[1.52836773, -3.14334831],
                          [-3.14334831,  8.19753385]])
    X = datasets.load_iris().data[:, 2:]
    emp_cov = empirical_covariance(X)
    for method in ('cd', 'lars'):
        cov, icov = graphical_lasso(emp_cov, alpha=.1, return_costs=False,
                                    mode=method)
        assert_array_almost_equal(cov, cov_skggm)
        assert_array_almost_equal(icov, icov_skggm)
Пример #9
0
    def update_omega(self, X, Y, W, b, N, K, sample_weight=None):

        """
        Update conditional covariance matrix among responeses.

        If assume sparse, then solve by graphical lasso (implemented by scikit_learn).
        Note that this option sometimes encounter warning related to PSD from
        the graphical lasso implementation.

        Parameters
        ----------
            X: numpy array
                N x D, features
            Y: numpy array
                N x K, responses
            W: numpy array
                D x K, coefficients of tasks
            b: numpy array
                K x 1, intercepts of tasks
            N: integer
                sample size
            K: integer
                dimension of tasks/responses
            sample_weight: numpy array
                N x 1, weight of each sample

        Returns
        -------
            omega: numpy array
                K x K, conditional covariance among responeses
            omega_i: numpy array
                K x K, inverse of omega

        """
        _dif = Y - X @ W - b
        H = np.diag(sample_weight)

        if self.sparse_omega:
            omega, omega_i = graphical_lasso((_dif.T @ _dif) / N, self.lam2)
        else:
            omega = (_dif.T @ H @ _dif + self.lam2 * np.identity(K)) / np.sum(H)
            omega_i = np.linalg.inv(omega)

        return omega, omega_i
Пример #10
0
def test_graphical_lasso(random_state=0):
    # Sample area_data from a sparse multivariate normal
    dim = 20
    n_samples = 100
    random_state = check_random_state(random_state)
    prec = make_sparse_spd_matrix(dim, alpha=.95, random_state=random_state)
    cov = linalg.inv(prec)
    X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
    emp_cov = empirical_covariance(X)

    for alpha in (0., .1, .25):
        covs = dict()
        icovs = dict()
        for method in ('cd', 'lars'):
            cov_, icov_, costs = graphical_lasso(emp_cov,
                                                 return_costs=True,
                                                 alpha=alpha,
                                                 mode=method)
            covs[method] = cov_
            icovs[method] = icov_
            costs, dual_gap = np.array(costs).T
            # Check that the costs always decrease (doesn't hold if alpha == 0)
            if not alpha == 0:
                assert_array_less(np.diff(costs), 0)
        # Check that the 2 approaches give similar results
        assert_array_almost_equal(covs['cd'], covs['lars'], decimal=4)
        assert_array_almost_equal(icovs['cd'], icovs['lars'], decimal=4)

    # Smoke test the estimator
    model = GraphicalLasso(alpha=.25).fit(X)
    model.score(X)
    assert_array_almost_equal(model.covariance_, covs['cd'], decimal=4)
    assert_array_almost_equal(model.covariance_, covs['lars'], decimal=4)

    # For a centered matrix, assume_centered could be chosen True or False
    # Check that this returns indeed the same result for centered area_data
    Z = X - X.mean(0)
    precs = list()
    for assume_centered in (False, True):
        prec_ = GraphicalLasso(
            assume_centered=assume_centered).fit(Z).precision_
        precs.append(prec_)
    assert_array_almost_equal(precs[0], precs[1])
Пример #11
0
    def _graphical_lasso(self,
                         expected_value_resid_square,
                         alpha=None,
                         normalize_param=None,
                         Sigma_init=None):
        """
        Given Gamma, we estimate Omega, the graphical lasso solution for the precision matrix

        :param expected_value_resid_square:
        :param Sigma_init:
        :param normalize_param: number of rows in Y matrix. This is not self.data.n when we use CV.
        :return:
        """
        if not normalize_param:
            normalize_param = self.data.n

        if not alpha:
            alpha = self.alpha

        expected_value_resid_square *= (1 / normalize_param)
        expected_value_resid_square = np.array(expected_value_resid_square)

        # TODO: for now we do not do CV since the func that does this requres (Y-Z\Gamma),
        #  and not (Y-Z\Gamma)^T(Y-Z\Gamma). In theory, we can do SVD of the latter to return to (Y-Z\Gamma),
        #  but this is weird, because this is an expectation... If we think about it, in the CV func we will get
        #  (Y-Z\Gamma)^T(Y-Z\Gamma) again as the covariance, so it's probably OK.
        if Sigma_init is not None:
            Sigma_init = np.matrix(Sigma_init)

        mode = 'cd'
        if self.data.n < self.data.p:
            # We preffer the LARS solver for very sparse underlying graphs
            # TODO: move this so we want need to check this every iteration
            mode = 'lars'

        Sigma, Omega = graphical_lasso(expected_value_resid_square,
                                       alpha=alpha,
                                       cov_init=Sigma_init,
                                       max_iter=self.glasso_max_iter,
                                       mode=mode)

        return Sigma, Omega
def test_graphical_lasso(random_state=0):
    # Sample data from a sparse multivariate normal
    dim = 20
    n_samples = 100
    random_state = check_random_state(random_state)
    prec = make_sparse_spd_matrix(dim, alpha=.95,
                                  random_state=random_state)
    cov = linalg.inv(prec)
    X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
    emp_cov = empirical_covariance(X)

    for alpha in (0., .1, .25):
        covs = dict()
        icovs = dict()
        for method in ('cd', 'lars'):
            cov_, icov_, costs = graphical_lasso(emp_cov, return_costs=True,
                                                 alpha=alpha, mode=method)
            covs[method] = cov_
            icovs[method] = icov_
            costs, dual_gap = np.array(costs).T
            # Check that the costs always decrease (doesn't hold if alpha == 0)
            if not alpha == 0:
                assert_array_less(np.diff(costs), 0)
        # Check that the 2 approaches give similar results
        assert_array_almost_equal(covs['cd'], covs['lars'], decimal=4)
        assert_array_almost_equal(icovs['cd'], icovs['lars'], decimal=4)

    # Smoke test the estimator
    model = GraphicalLasso(alpha=.25).fit(X)
    model.score(X)
    assert_array_almost_equal(model.covariance_, covs['cd'], decimal=4)
    assert_array_almost_equal(model.covariance_, covs['lars'], decimal=4)

    # For a centered matrix, assume_centered could be chosen True or False
    # Check that this returns indeed the same result for centered data
    Z = X - X.mean(0)
    precs = list()
    for assume_centered in (False, True):
        prec_ = GraphicalLasso(
            assume_centered=assume_centered).fit(Z).precision_
        precs.append(prec_)
    assert_array_almost_equal(precs[0], precs[1])
def test_graphical_lasso_iris():
    # Hard-coded solution from R glasso package for alpha=1.0
    # (need to set penalize.diagonal to FALSE)
    cov_R = np.array([
        [0.68112222, 0.0000000, 0.265820, 0.02464314],
        [0.00000000, 0.1887129, 0.000000, 0.00000000],
        [0.26582000, 0.0000000, 3.095503, 0.28697200],
        [0.02464314, 0.0000000, 0.286972, 0.57713289]
        ])
    icov_R = np.array([
        [1.5190747, 0.000000, -0.1304475, 0.0000000],
        [0.0000000, 5.299055, 0.0000000, 0.0000000],
        [-0.1304475, 0.000000, 0.3498624, -0.1683946],
        [0.0000000, 0.000000, -0.1683946, 1.8164353]
        ])
    X = datasets.load_iris().data
    emp_cov = empirical_covariance(X)
    for method in ('cd', 'lars'):
        cov, icov = graphical_lasso(emp_cov, alpha=1.0, return_costs=False,
                                    mode=method)
        assert_array_almost_equal(cov, cov_R)
        assert_array_almost_equal(icov, icov_R)
Пример #14
0
def glasso_R(data, alphas, mode='cd'):
    """
        Estimates the graph with graphical lasso based on its implementation in R.

        Parameters
        ----------
        data: numpy ndarray
            The input data for to reconstruct/estimate a graph on. Features as columns and observations as rows.
        alphas: float
            Non-negative regularization parameter of the graphical lasso algorithm.
        Returns
        -------
        adjacency matrix : the estimated adjacency matrix.
    """
    scaler = StandardScaler()
    data = scaler.fit_transform(data)
    _ , n_samples = data.shape
    cov_emp = np.dot(data.T, data) / n_samples
    covariance, precision_matrix = graphical_lasso(emp_cov=cov_emp, alpha=alphas, mode=mode)
    adjacency_matrix = precision_matrix.astype(bool).astype(int)
    adjacency_matrix[np.diag_indices_from(adjacency_matrix)] = 0
    return adjacency_matrix
Пример #15
0
def test_graphical_lasso_iris():
    # Hard-coded solution from R glasso package for alpha=1.0
    # The iris datasets in R and scikit-learn do not match in a few places,
    # these values are for the scikit-learn version.
    cov_R = np.array([
        [0.68112222, 0.0, 0.2651911, 0.02467558],
        [0.00, 0.1867507, 0.0, 0.00],
        [0.26519111, 0.0, 3.0924249, 0.28774489],
        [0.02467558, 0.0, 0.2877449, 0.57853156]
    ])
    icov_R = np.array([
        [1.5188780, 0.0, -0.1302515, 0.0],
        [0.0, 5.354733, 0.0, 0.0],
        [-0.1302515, 0.0, 0.3502322, -0.1686399],
        [0.0, 0.0, -0.1686399, 1.8123908]
    ])
    X = datasets.load_iris().data
    emp_cov = empirical_covariance(X)
    for method in ('cd', 'lars'):
        cov, icov = graphical_lasso(emp_cov, alpha=1.0, return_costs=False,
                                    mode=method)
        assert_array_almost_equal(cov, cov_R)
        assert_array_almost_equal(icov, icov_R)
Пример #16
0
def ta_sparse_covariance(df: Typing.PatchedPandas,
                         convert_to='returns',
                         covariance='ewma',
                         cov_arg=0.97,
                         rho=0.1,
                         inverse=False,
                         **kwargs):
    from sklearn.covariance import graphical_lasso

    if covariance in ['ewma', 'weighted']:
        cov_func = ta_ewma_covariance
    elif covariance in ['rolling', 'moving']:
        cov_func = ta_moving_covariance
    elif covariance in ['garch', 'mgarch']:
        cov_func = ta_mgarch_covariance
    else:
        raise ValueError("unknown covariance expected one of [ewma, moving]")

    return \
        cov_func(df, cov_arg, convert_to) \
        .groupby(level=0) \
        .apply(lambda x: x if x.isnull().values.any() else \
                         _pd.DataFrame(graphical_lasso(x.values, rho, **kwargs)[int(inverse)], index=x.index, columns=x.columns))
Пример #17
0
    def _fit(self, pairs, y):
        if not HAS_SKGGM:
            if self.verbose:
                print("SDML will use scikit-learn's graphical lasso solver.")
        else:
            if self.verbose:
                print("SDML will use skggm's graphical lasso solver.")
        pairs, y = self._prepare_inputs(pairs, y, type_of_inputs='tuples')

        # set up (the inverse of) the prior M
        if self.use_cov:
            X = np.vstack(
                {tuple(row)
                 for row in pairs.reshape(-1, pairs.shape[2])})
            prior_inv = np.atleast_2d(np.cov(X, rowvar=False))
        else:
            prior_inv = np.identity(pairs.shape[2])
        diff = pairs[:, 0] - pairs[:, 1]
        loss_matrix = (diff.T * y).dot(diff)
        emp_cov = prior_inv + self.balance_param * loss_matrix

        # our initialization will be the matrix with emp_cov's eigenvalues,
        # with a constant added so that they are all positive (plus an epsilon
        # to ensure definiteness). This is empirical.
        w, V = np.linalg.eigh(emp_cov)
        min_eigval = np.min(w)
        if min_eigval < 0.:
            warnings.warn(
                "Warning, the input matrix of graphical lasso is not "
                "positive semi-definite (PSD). The algorithm may diverge, "
                "and lead to degenerate solutions. "
                "To prevent that, try to decrease the balance parameter "
                "`balance_param` and/or to set use_cov=False.",
                ConvergenceWarning)
            w -= min_eigval  # we translate the eigenvalues to make them all positive
        w += 1e-10  # we add a small offset to avoid definiteness problems
        sigma0 = (V * w).dot(V.T)
        try:
            if HAS_SKGGM:
                theta0 = pinvh(sigma0)
                M, _, _, _, _, _ = quic(emp_cov,
                                        lam=self.sparsity_param,
                                        msg=self.verbose,
                                        Theta0=theta0,
                                        Sigma0=sigma0)
            else:
                _, M = graphical_lasso(emp_cov,
                                       alpha=self.sparsity_param,
                                       verbose=self.verbose,
                                       cov_init=sigma0)
            raised_error = None
            w_mahalanobis, _ = np.linalg.eigh(M)
            not_spd = any(w_mahalanobis < 0.)
            not_finite = not np.isfinite(M).all()
        except Exception as e:
            raised_error = e
            not_spd = False  # not_spd not applicable here so we set to False
            not_finite = False  # not_finite not applicable here so we set to False
        if raised_error is not None or not_spd or not_finite:
            msg = ("There was a problem in SDML when using {}'s graphical "
                   "lasso solver."
                   ).format("skggm" if HAS_SKGGM else "scikit-learn")
            if not HAS_SKGGM:
                skggm_advice = (
                    " skggm's graphical lasso can sometimes converge "
                    "on non SPD cases where scikit-learn's graphical "
                    "lasso fails to converge. Try to install skggm and "
                    "rerun the algorithm (see the README.md for the "
                    "right version of skggm).")
                msg += skggm_advice
            if raised_error is not None:
                msg += " The following error message was thrown: {}.".format(
                    raised_error)
            raise RuntimeError(msg)

        self.transformer_ = transformer_from_metric(np.atleast_2d(M))
        return self
Пример #18
0
    def _fit(self, pairs, y):
        if self.use_cov != 'deprecated':
            warnings.warn(
                '"use_cov" parameter is not used.'
                ' It has been deprecated in version 0.5.0 and will be'
                'removed in 0.6.0. Use "prior" instead.', DeprecationWarning)
        if not HAS_SKGGM:
            if self.verbose:
                print("SDML will use scikit-learn's graphical lasso solver.")
        else:
            if self.verbose:
                print("SDML will use skggm's graphical lasso solver.")
        pairs, y = self._prepare_inputs(pairs, y, type_of_inputs='tuples')

        # set up (the inverse of) the prior M
        # if the prior is the default (None), we raise a warning
        if self.prior is None:
            # TODO:
            #  replace prior=None by prior='identity' in v0.6.0 and remove the
            #  warning
            msg = (
                "Warning, no prior was set (`prior=None`). As of version 0.5.0, "
                "the default prior will now be set to "
                "'identity', instead of 'covariance'. If you still want to use "
                "the inverse of the covariance matrix as a prior, "
                "set prior='covariance'. This warning will disappear in "
                "v0.6.0, and `prior` parameter's default value will be set to "
                "'identity'.")
            warnings.warn(msg, ChangedBehaviorWarning)
            prior = 'identity'
        else:
            prior = self.prior
        _, prior_inv = _initialize_metric_mahalanobis(
            pairs,
            prior,
            return_inverse=True,
            strict_pd=True,
            matrix_name='prior',
            random_state=self.random_state)
        diff = pairs[:, 0] - pairs[:, 1]
        loss_matrix = (diff.T * y).dot(diff)
        emp_cov = prior_inv + self.balance_param * loss_matrix

        # our initialization will be the matrix with emp_cov's eigenvalues,
        # with a constant added so that they are all positive (plus an epsilon
        # to ensure definiteness). This is empirical.
        w, V = np.linalg.eigh(emp_cov)
        min_eigval = np.min(w)
        if min_eigval < 0.:
            warnings.warn(
                "Warning, the input matrix of graphical lasso is not "
                "positive semi-definite (PSD). The algorithm may diverge, "
                "and lead to degenerate solutions. "
                "To prevent that, try to decrease the balance parameter "
                "`balance_param` and/or to set prior='identity'.",
                ConvergenceWarning)
            w -= min_eigval  # we translate the eigenvalues to make them all positive
        w += 1e-10  # we add a small offset to avoid definiteness problems
        sigma0 = (V * w).dot(V.T)
        try:
            if HAS_SKGGM:
                theta0 = pinvh(sigma0)
                M, _, _, _, _, _ = quic(emp_cov,
                                        lam=self.sparsity_param,
                                        msg=self.verbose,
                                        Theta0=theta0,
                                        Sigma0=sigma0)
            else:
                _, M = graphical_lasso(emp_cov,
                                       alpha=self.sparsity_param,
                                       verbose=self.verbose,
                                       cov_init=sigma0)
            raised_error = None
            w_mahalanobis, _ = np.linalg.eigh(M)
            not_spd = any(w_mahalanobis < 0.)
            not_finite = not np.isfinite(M).all()
        except Exception as e:
            raised_error = e
            not_spd = False  # not_spd not applicable here so we set to False
            not_finite = False  # not_finite not applicable here so we set to False
        if raised_error is not None or not_spd or not_finite:
            msg = ("There was a problem in SDML when using {}'s graphical "
                   "lasso solver."
                   ).format("skggm" if HAS_SKGGM else "scikit-learn")
            if not HAS_SKGGM:
                skggm_advice = (
                    " skggm's graphical lasso can sometimes converge "
                    "on non SPD cases where scikit-learn's graphical "
                    "lasso fails to converge. Try to install skggm and "
                    "rerun the algorithm (see the README.md for the "
                    "right version of skggm).")
                msg += skggm_advice
            if raised_error is not None:
                msg += " The following error message was thrown: {}.".format(
                    raised_error)
            raise RuntimeError(msg)

        self.components_ = components_from_metric(np.atleast_2d(M))
        return self
Пример #19
0
    def fit_sklearn(self):

        return graphical_lasso(emp_cov=self.emp_cov, alpha=self.alpha)
Пример #20
0
    def fit(self, reltol=1e-5, max_itr=1000, verbose=True):

        nr_itr = 0
        has_not_converged = True
        Theta0 = self.Theta_init.copy()
        mu0 = self.mu_init
        tau = np.zeros(self.N)
        tol = np.inf
        while has_not_converged and nr_itr < max_itr:
            print(f'{nr_itr} / {max_itr}, {tol}')

            if verbose:
                print(f' {nr_itr} / {max_itr}')

            if self.mu_zero:
                for t in range(self.N):
                    tau[t] = (self.nu + self.N) / (self.nu + np.dot(
                        self.x[t, :], Theta0).dot(self.x[t, :]))

                # S_hat = np.zeros((self.p, self.p))
                # for i in range(self.N):
                #     S_hat += np.outer(self.x[i,:], self.x[i,:]) * tau[i]

                S_hat = np.array([
                    np.outer(self.x[i, :], self.x[i, :]) * tau[i]
                    for i in range(self.N)
                ]).sum(0)
                # print(S_hat[:5,:5])

                _, Theta_t = graphical_lasso(S_hat, self.alpha)

                tol = np.linalg.norm(Theta_t - Theta0, 'fro') / np.linalg.norm(
                    Theta0, 'fro')
                has_not_converged = (np.linalg.norm(Theta_t - Theta0, 'fro') /
                                     np.linalg.norm(Theta0, 'fro') > reltol)
                Theta0 = Theta_t.copy()
            else:
                for t in range(self.N):
                    tau[t] = (self.nu + self.N) / (self.nu + np.dot(
                        self.x[t, :] - mu0, Theta0).dot(self.x[t, :] - mu0))

                sum_tau = np.sum(tau)

                mu_hat = np.array(
                    [tau[i] * self.x[i, :]
                     for i in range(self.p)]).sum(0) / sum_tau
                S_hat = np.array([
                    np.outer(self.x[i, :] - mu_hat, self.x[i, :] - mu_hat) *
                    tau[i] for i in range(self.N)
                ]).sum(0)

                _, Theta_t = graphical_lasso(S_hat, self.alpha)
                tol = np.linalg.norm(Theta_t - Theta0, 'fro') / np.linalg.norm(
                    Theta0, 'fro')
                has_not_converged = (tol > reltol)

                Theta0 = Theta_t.copy()
                mu0 = mu_hat.copy()

            nr_itr += 1

        return ((self.nu - 2.0) / self.nu) * Theta_t
Пример #21
0
def markov_network(sigma2, k, lambda_vec, tol=10**-14, opt=False):
    """ For details, see here.

    Parameters
    ----------
        sigma2 : array, shape(n_, n_)
        k : scalar
        lambda_vec : array, shape(l_,)
        tol : scalar
        opt : bool

    Returns
    ----------
        sigma2_bar : array, shape(n_, n_, l_)
        c2_bar : array, shape(n_, n_, l_)
        phi2_bar : array, shape(n_, n_, l_)
        lambda_bar : scalar
        conv : scalar
        l_bar : scalar

    """

    lambda_vec = np.sort(lambda_vec)

    l_ = len(lambda_vec)

    c2_bar = np.zeros(sigma2.shape + (l_, ))
    phi2_bar = np.zeros(sigma2.shape + (l_, ))
    z = np.zeros(l_)

    # Compute correlation
    c2, sigma_vec = cov_2_corr(sigma2)

    for l in range(l_):
        lam = lambda_vec[l]

        # perform glasso shrinkage
        _, invs2_tilde, *_ = graphical_lasso(c2, lam)

        # correlation extraction
        c2_tilde = np.linalg.solve(invs2_tilde, np.eye(invs2_tilde.shape[0]))
        c2_bar[:, :, l] = cov_2_corr(c2_tilde)[0]  # estimated corr.

        # inv. corr.
        phi2_bar[:, :, l] = np.linalg.solve(c2_bar[:, :, l],
                                            np.eye(c2_bar[:, :, l].shape[0]))

        tmp = abs(phi2_bar[:, :, l])
        z[l] = np.sum(tmp < tol)

    # selection
    index = list(np.where(z >= k)[0])
    if len(index) == 0:
        index.append(l)
        conv = 0  # target of k null entries not reached
    else:
        conv = 1  # target of k null entries reached
    l_bar = index[0]
    lambda_bar = lambda_vec[l_bar]

    # output
    if not opt:
        c2_bar = c2_bar[:, :, l_bar]  # shrunk correlation
        phi2_bar = phi2_bar[:, :, l_bar]  # shrunk inverse correlation
        l_bar = None
        # shrunk covariance
        sigma2_bar = np.diag(sigma_vec) @ c2_bar @ np.diag(sigma_vec)
    else:
        sigma2_bar = np.zeros(sigma2.shape + (l_, ))
        for l in range(l_):
            sigma2_bar[:, :, l] = np.diag(sigma_vec) @ c2_bar[:, :, l] @ \
                                  np.diag(sigma_vec)

    return sigma2_bar, c2_bar, phi2_bar, lambda_bar, conv, l_bar
Пример #22
0
    def fit(self, X, y):
        """Fit the QDA to the training data"""
        methods = [
            None, 'nonpara', "fr", "kl", "mean", "wass", "reg", "freg",
            "sparse", "kl_new"
        ]
        rules = ["qda", "da", "fda"]
        if self.method not in methods:
            raise ValueError("method must be in {}; got (method={})".format(
                methods, self.method))
        if self.rule not in rules:
            raise ValueError("rule must be in {}; got (rule={})".format(
                rules, self.rule))
        X, y = check_X_y(X, y)

        self.labels_, self.n_samples_ = np.unique(y, return_counts=True)
        self.n_class_ = self.labels_.size
        n_samples, self.n_features_ = X.shape
        self.rho_ = np.array([self.rho]).ravel()
        if self.rho == -1:
            chi_quantile = chi2.ppf(
                0.5,
                self.n_features_ * (self.n_features_ + 3) / 2)
            self.rho_ = chi_quantile * np.ones(self.n_class_) / self.n_samples_
        else:
            if self.rho_.size == 1:
                self.rho_ = self.rho_[0] * np.ones(self.n_class_)
            if self.adaptive:
                self.rho_ *= np.sqrt(self.n_features_)
        # PRINT!!!!
        #print(self.n_features_,  chi_quantile,self.n_samples_,self.rho_)
        if self.priors is None:
            self.priors_ = self.n_samples_ / n_samples
        else:
            self.priors_ = self.priors

        self.mean_ = []
        self.covariance_ = []
        self.cov_sqrt_ = []
        self.prec_ = []
        self.prec_sqrt_ = []
        self.logdet_ = []
        self.rotations_ = []
        self.scalings_ = []
        for n_c, label in enumerate(self.labels_):
            mask = (y == label)
            X_c = X[mask, :]
            X_c_mean = np.mean(X_c, 0)
            X_c_bar = X_c - X_c_mean
            U, s, Vt = np.linalg.svd(X_c_bar, full_matrices=False)
            s2 = (s**2) / (len(X_c_bar) - 1)
            self.mean_.append(X_c_mean)
            if self.method == 'reg':
                s2 += self.rho_[n_c]
                inv_s2 = 1 / s2
            elif self.method in [
                    'fr', 'kl', 'mean', 'freg', 'kl_new', 'nonpara'
            ]:
                sc = StandardScaler()
                X_c_ = sc.fit_transform(X_c)
                cov_c = ledoit_wolf(X_c_)[0]
                cov_c = sc.scale_[:, np.newaxis] * cov_c * sc.scale_[
                    np.newaxis, :]
                s2, V = np.linalg.eigh(cov_c)
                s2 = np.abs(s2)
                inv_s2 = 1 / s2
                Vt = V.T
            elif self.method == 'sparse':
                try:
                    cov_c = GraphicalLasso(alpha=self.rho_[n_c]).fit(X_c_bar)
                    cov_c = cov_c.covariance__
                except:
                    tol = self.tol * 1e6
                    cov_c = graphical_lasso(
                        np.dot(((1 - tol) * s2 + tol) * Vt.T, Vt),
                        self.rho_[n_c])[0]
                s2, V = np.linalg.eigh(cov_c)
                s2 = np.abs(s2)
                inv_s2 = 1 / s2
                Vt = V.T
            elif self.method == 'wass':
                f = lambda gamma: gamma * (self.rho_[n_c] ** 2 - 0.5 * np.sum(s2)) - self.n_features_ + \
                                  0.5 * (np.sum(np.sqrt((gamma ** 2) * (s2 ** 2) + 4 * s2 * gamma)))
                lb = 0
                gamma_0 = 0
                ub = np.sum(np.sqrt(1 / (s2 + self.tol))) / self.rho_[n_c]
                f_ub = f(ub)
                for bsect in range(100):
                    gamma_0 = 0.5 * (ub + lb)
                    f_gamma_0 = f(gamma_0)
                    if f_ub * f_gamma_0 > 0:
                        ub = gamma_0
                        f_ub = f_gamma_0
                    else:
                        lb = gamma_0
                    if abs(ub - lb) < self.tol:
                        break
                inv_s2 = gamma_0 * (1 - 2 / (1 + np.sqrt(1 + 4 /
                                                         (gamma_0 *
                                                          (s2 + self.tol)))))
                s2 = 1 / (inv_s2 + self.tol)
            else:
                s2 += self.tol
                inv_s2 = 1 / s2
            self.covariance_.append(np.dot(s2 * Vt.T, Vt))
            self.cov_sqrt_.append(np.dot(np.sqrt(s2) * Vt.T, Vt))
            self.prec_.append(np.dot(inv_s2 * Vt.T, Vt))
            self.prec_sqrt_.append(np.dot(np.sqrt(inv_s2) * Vt.T, Vt))
            self.logdet_.append(np.log(s2).sum())
            #print(self.logdet_)
            self.rotations_.append(Vt)
            self.scalings_.append(s2)
        return self
Пример #23
0
  def _fit(self, pairs, y):
    if not HAS_SKGGM:
      if self.verbose:
        print("SDML will use scikit-learn's graphical lasso solver.")
    else:
      if self.verbose:
        print("SDML will use skggm's graphical lasso solver.")
    pairs, y = self._prepare_inputs(pairs, y,
                                    type_of_inputs='tuples')

    # set up (the inverse of) the prior M
    if self.use_cov:
      X = np.vstack({tuple(row) for row in pairs.reshape(-1, pairs.shape[2])})
      prior_inv = np.atleast_2d(np.cov(X, rowvar=False))
    else:
      prior_inv = np.identity(pairs.shape[2])
    diff = pairs[:, 0] - pairs[:, 1]
    loss_matrix = (diff.T * y).dot(diff)
    emp_cov = prior_inv + self.balance_param * loss_matrix

    # our initialization will be the matrix with emp_cov's eigenvalues,
    # with a constant added so that they are all positive (plus an epsilon
    # to ensure definiteness). This is empirical.
    w, V = np.linalg.eigh(emp_cov)
    min_eigval = np.min(w)
    if min_eigval < 0.:
      warnings.warn("Warning, the input matrix of graphical lasso is not "
                    "positive semi-definite (PSD). The algorithm may diverge, "
                    "and lead to degenerate solutions. "
                    "To prevent that, try to decrease the balance parameter "
                    "`balance_param` and/or to set use_cov=False.",
                    ConvergenceWarning)
      w -= min_eigval  # we translate the eigenvalues to make them all positive
    w += 1e-10  # we add a small offset to avoid definiteness problems
    sigma0 = (V * w).dot(V.T)
    try:
      if HAS_SKGGM:
        theta0 = pinvh(sigma0)
        M, _, _, _, _, _ = quic(emp_cov, lam=self.sparsity_param,
                                msg=self.verbose,
                                Theta0=theta0, Sigma0=sigma0)
      else:
        _, M = graphical_lasso(emp_cov, alpha=self.sparsity_param,
                               verbose=self.verbose,
                               cov_init=sigma0)
      raised_error = None
      w_mahalanobis, _ = np.linalg.eigh(M)
      not_spd = any(w_mahalanobis < 0.)
      not_finite = not np.isfinite(M).all()
    except Exception as e:
      raised_error = e
      not_spd = False  # not_spd not applicable here so we set to False
      not_finite = False  # not_finite not applicable here so we set to False
    if raised_error is not None or not_spd or not_finite:
      msg = ("There was a problem in SDML when using {}'s graphical "
             "lasso solver.").format("skggm" if HAS_SKGGM else "scikit-learn")
      if not HAS_SKGGM:
        skggm_advice = (" skggm's graphical lasso can sometimes converge "
                        "on non SPD cases where scikit-learn's graphical "
                        "lasso fails to converge. Try to install skggm and "
                        "rerun the algorithm (see the README.md for the "
                        "right version of skggm).")
        msg += skggm_advice
      if raised_error is not None:
        msg += " The following error message was thrown: {}.".format(
            raised_error)
      raise RuntimeError(msg)

    self.transformer_ = transformer_from_metric(np.atleast_2d(M))
    return self
Пример #24
0
def solve_glasso2(cov, lambda_):
    return graphical_lasso(cov, lambda_)[1]
# (a)(ii) Observations
# All covariances are positive, with smallest value 3.63e-05
# Many pairwise precisions are near 0, but not exactly 0
# Suggests that the underlying undirected graphical model is not sparse


# (b) Graphical Lasso
# As alpha increases in size, the covariance elements shrink towards 0
# Sparsity of graph also shrinks
from sklearn.covariance import graphical_lasso


penalized_covs, penalized_precs = {}, {}
for alpha in [1e-5, 1e-4, 1e-3]:
    penalized_cov, penalized_prec = graphical_lasso(
        emp_cov=sample_cov,
        alpha=alpha,
        max_iter=1000)
    penalized_covs[alpha] = penalized_cov
    penalized_precs[alpha] = penalized_prec
    print(f'Number of Non-Zero Edges (alpha = {alpha}): {np.sum(penalized_prec != 0.)}', )

fig, axes = plt.subplots(nrows=1, ncols=2)
axes[0].set_xlabel('Pairwise Covariances')
axes[1].set_xlabel('Pairwise Precisions')
for alpha, penalized_cov in penalized_covs.items():
    axes[0].hist(penalized_cov.flatten(),
                 label=r'$\alpha = $' + str(alpha),
                 bins=50)
    axes[1].hist(penalized_precs[alpha].flatten(),
                 label=r'$\alpha = $' + str(alpha),
                 bins=50)
Пример #26
0
def get_alpha_max(X, observation, sigma_min, pb_name, alpha_Sigma_inv=None):
    """Compute alpha_max specific to pb_name.

    Parameters:
    ----------
    X: np.array, shape (n_channels, n_sources)
    observation: np.array, shape (n_channels, n_times) or
    (n_epochs, n_channels, n_times)
    sigma_min: float, >0
    pb_name: string, "SGCL" "CLaR" "MTL" "MTLME"

    Output:
    -------
    float
        alpha_max of the optimization problem.
    """
    n_channels, n_times = observation.shape[-2], observation.shape[-1]

    if observation.ndim == 3:
        Y = observation.mean(axis=0)
    else:
        Y = observation

    if pb_name == "MTL":
        n_channels, n_times = Y.shape
        alpha_max = l_2_inf(X.T @ Y) / (n_times * n_channels)
    elif pb_name == "MTLME":
        observations = observation.transpose((1, 0, 2))
        observations = observations.reshape(observations.shape[0], -1)
        alpha_max = get_alpha_max(X, observations, sigma_min, "MTL")
    elif pb_name == "SGCL":
        assert observation.ndim == 2
        _, S_max_inv = clp_sqrt(Y @ Y.T / n_times, sigma_min)
        alpha_max = l_2_inf(X.T @ S_max_inv @ Y)
        alpha_max /= (n_channels * n_times)
    elif pb_name == "CLAR" or pb_name == "NNCVX":
        n_epochs = observation.shape[0]
        cov_Yl = 0
        for l in range(n_epochs):
            cov_Yl += observation[l, :, :] @ observation[l, :, :].T
        cov_Yl /= (n_epochs * n_times)
        _, S_max_inv = clp_sqrt(cov_Yl, sigma_min)
        alpha_max = l_2_inf(X.T @ S_max_inv @ Y)
        alpha_max /= (n_channels * n_times)
    elif pb_name == "mrce":
        assert observation.ndim == 3
        assert alpha_Sigma_inv is not None
        emp_cov = get_emp_cov(observation)
        Sigma, Sigma_inv = graphical_lasso(emp_cov,
                                           alpha_Sigma_inv,
                                           max_iter=10**6)
        alpha_max = l_2_inf(X.T @ Sigma_inv @ Y) / (n_channels * n_times)
    elif pb_name == "glasso":
        assert observation.ndim == 2
        assert alpha_Sigma_inv is not None
        emp_cov = observation @ observation.T / n_times
        Sigma, Sigma_inv = graphical_lasso(emp_cov, alpha_Sigma_inv)
        alpha_max = l_2_inf(X.T @ Sigma_inv @ Y) / (n_channels * n_times)
    elif pb_name == "mrce":
        assert observation.ndim == 2
        assert alpha_Sigma_inv is not None
        emp_cov = observation @ observation.T / n_times
        Sigma, Sigma_inv = graphical_lasso(emp_cov,
                                           alpha_Sigma_inv,
                                           max_iter=10**6)
        alpha_max = np.abs(X.T @ Sigma_inv @ Y).max() / (n_channels * n_times)
    else:
        raise NotImplementedError("No solver '{}' in sgcl".format(pb_name))
    return alpha_max
Пример #27
0
def fit_lfm_roblasso(x,
                     z,
                     p=None,
                     nu=1e9,
                     lambda_beta=0.,
                     lambda_phi=0.,
                     tol=1e-3,
                     fit_intercept=True,
                     maxiter=500,
                     print_iter=False,
                     rescale=False):
    """For details, see here.

    Parameters
    ----------
        x : array, shape(t_, n_)
        z : array, shape(t_, k_)
        p : array, optional, shape(t_)
        nu : scalar, optional
        lambda_beta : scalar, optional
        lambda_phil : scalar, optional
        tol : float, optional
        fit_intercept: bool, optional
        maxiter : scalar, optional
        print_iter : bool, optional
        rescale : bool, optional

    Returns
    -------
       alpha_RMLFP : array, shape(n_,)
       beta_RMLFP : array, shape(n_,k_)
       sig2_RMLFP : array, shape(n_,n_)

    """

    if len(x.shape) == 1:
        x = x.reshape(-1, 1)

    if len(z.shape) == 1:
        z = z.reshape(-1, 1)

    t_, n_ = x.shape

    if p is None:
        p = np.ones(t_) / t_

    # rescale the variables
    if rescale is True:
        _, sigma2_x = meancov_sp(x, p)
        sigma_x = np.sqrt(np.diag(sigma2_x))
        x = x / sigma_x

        _, sigma2_z = meancov_sp(z, p)
        sigma_z = np.sqrt(np.diag(sigma2_z))
        z = z / sigma_z

    # Step 0: Set initial values using method of moments

    alpha, beta, sigma2, u = fit_lfm_lasso(x,
                                           z,
                                           p,
                                           lambda_beta,
                                           fit_intercept=fit_intercept)
    mu_u = np.zeros(n_)

    for i in range(maxiter):

        # Step 1: Update the weights

        if nu >= 1e3 and np.linalg.det(sigma2) < 1e-13:
            w = np.ones(t_)
        else:
            w = (nu + n_) / (nu + mahalanobis_dist(u, mu_u, sigma2)**2)
        q = w * p
        q = q / np.sum(q)

        # Step 2: Update location and dispersion parameters

        alpha_old, beta_old = alpha, beta
        alpha, beta, sigma2, u = fit_lfm_lasso(x,
                                               z,
                                               q,
                                               lambda_beta,
                                               fit_intercept=fit_intercept)
        sigma2, _ = graphical_lasso((w @ p) * sigma2, lambda_phi)

        # Step 3: Check convergence

        errors = [
            np.linalg.norm(alpha - alpha_old, ord=np.inf) /
            max(np.linalg.norm(alpha_old, ord=np.inf), 1e-20),
            np.linalg.norm(beta - beta_old, ord=np.inf) /
            max(np.linalg.norm(beta_old, ord=np.inf), 1e-20)
        ]

        # print the loglikelihood and the error
        if print_iter is True:
            print('Iter: %i; Loglikelihood: %.5f; Errors: %.5f' %
                  (i, p @ mvt_logpdf(u, mu_u, sigma2, nu) -
                   lambda_beta * np.linalg.norm(beta, ord=1), max(errors)))

        if max(errors) <= tol:
            break

    if rescale is True:
        alpha = alpha * sigma_x
        beta = ((beta / sigma_z).T * sigma_x).T
        sigma2 = (sigma2.T * sigma_x).T * sigma_x

    return alpha, beta, sigma2
Пример #28
0
    def fit_CV(self):
        """"
        test many alphas, pick the one with best EBIC
        """
        ebic_vals = []  #np.ones(len(alphas)) * np.inf

        prec_list = []  # np.zeros((len(alphas), self.p, self.p))
        cov_list = []  # np.zeros((len(alphas), self.p, self.p))
        alpha_list = []

        alpha = 0.5
        max_itr = 1000
        best_not_found = True
        nr_iterations = 0
        best_ebic = np.inf
        time_since_last_min = 0
        not_all_sparse = True

        while (best_not_found) and (nr_iterations <
                                    max_itr) and (not_all_sparse):
            # print(f'{nr_iterations} {alpha}')
            alpha_list.append(alpha)

            try:
                out_cov, out_prec = graphical_lasso(
                    emp_cov=self.emp_cov.copy(), alpha=alpha)
            except FloatingPointError:
                ebic_vals.append(np.inf)
                prec_list.append(np.inf)
                cov_list.append(np.inf)
                alpha += 0.01
                nr_iterations += 1
                continue
            except ValueError:
                ebic_vals.append(np.inf)
                prec_list.append(np.inf)
                cov_list.append(np.inf)
                alpha += 0.01
                nr_iterations += 1
                continue

            ebic_t = EBIC(self.N, self.emp_cov, out_prec, beta=self.beta)
            print(
                f'{nr_iterations} {alpha} {ebic_t} {self.N * gaussian_likelihood(self.emp_cov, out_prec)}'
            )
            ebic_vals.append(ebic_t)

            time_since_last_min += 1
            if ebic_t < best_ebic:
                best_ebic = ebic_t
                time_since_last_min = 0
                best_alpha = alpha

            prec_list.append(out_prec)
            cov_list.append(out_cov)

            if time_since_last_min > 50:
                best_not_found = False

            alpha += 0.01
            nr_iterations += 1
            not_all_sparse = (np.count_nonzero(np.triu(out_prec, 1)) != 0)

        best_idx = np.argmin(ebic_vals)
        if np.isscalar(best_idx):
            best_prec = prec_list[best_idx]
            best_alpha = alpha_list[best_idx]
        else:
            best_prec = prec_list[best_idx[0]]
            best_alpha = alpha_list[best_idx[0]]

        return best_prec, prec_list, cov_list, ebic_vals, best_alpha, alpha_list
Пример #29
0
    def fit(
        self,
        TS,
        alpha=0.01,
        max_iter=100,
        tol=0.0001,
        threshold_type='degree',
        **kwargs
    ):
        """Performs a graphical lasso.

        For details see [1, 2].

        The results dictionary also stores the covariance matrix as
        `'weights_matrix'`, the precision matrix as `'precision_matrix'`,
        and the thresholded version of the covariance matrix as
        `'thresholded_matrix'`.

        This implementation uses `scikit-learn`'s implementation of the
        graphical lasso; for convenience two control parameters `tol` and
        `max_iter` are available to interface with their method.

        Parameters
        ----------

        TS (np.ndarray)
            Array consisting of :math:`L` observations from :math:`N`
            sensors.

        alpha (float, default=0.01)
            Coefficient of penalization, higher values means more
            sparseness

        max_iter (int, default=100)
            Maximum number of iterations.

        tol (float, default=0.0001)
            Stop the algorithm when the duality gap is below a certain
            threshold.

        threshold_type (str)
            Which thresholding function to use on the matrix of
            weights. See `netrd.utilities.threshold.py` for
            documentation. Pass additional arguments to the thresholder
            using ``**kwargs``.

        Returns
        -------

        G (nx.Graph)
            A reconstructed graph with :math:`N` nodes.

        References
        ----------

        .. [1] J. Friedman, T. Hastie, R. Tibshirani, "Sparse inverse
               covariance estimation with the graphical lasso",
               Biostatistics 9, pp. 432–441 (2008).

        .. [2] https://github.com/CamDavidsonPilon/Graphical-Lasso-in-Finance

        """
        emp_cov = np.cov(TS)

        cov, prec = graphical_lasso(emp_cov, alpha, max_iter=max_iter, tol=tol)
        self.results['weights_matrix'] = cov
        self.results['precision_matrix'] = prec

        # threshold the network
        self.results['thresholded_matrix'] = threshold(
            self.results['weights_matrix'], threshold_type, **kwargs
        )

        # construct the network
        G = create_graph(self.results['thresholded_matrix'])
        self.results['graph'] = G

        return G
Пример #30
0
def run_samples_lasso(N, B, alpha, theta1, theta2, s1, s2):
    import myKernels.RandomWalk as rw
    test_info = pd.DataFrame()
    k = theta1.shape[0]
    for sample in tqdm.tqdm(range(N)):

        Gs1 = []
        Gs2 = []
        error_1 = []
        error_2 = []
        n = 50

        for i in range(50):
            x1 = np.random.multivariate_normal(mean=np.zeros(k),
                                               cov=theta1,
                                               size=100)
            A1 = np.corrcoef(x1.T)
            if alpha == 0:
                np.fill_diagonal(A1, 0)
                A1[np.abs(A1) < 1e-5] = 0
            else:
                gl = graphical_lasso(A1, alpha=alpha, max_iter=1000)
                A1 = gl[0]
                A1[np.abs(A1) < 1e-5] = 0
                np.fill_diagonal(A1, 0)

            Gs1.append(nx.from_numpy_matrix(A1))
            error_1.append(
                np.sum(
                    np.logical_xor(
                        np.abs(np.triu(A1, 1)) > 0,
                        np.abs(np.triu(theta1, 1)) > 0)))
            x2 = np.random.multivariate_normal(mean=np.zeros(k),
                                               cov=theta2,
                                               size=100)
            A2 = np.corrcoef(x2.T)
            if alpha == 0:
                np.fill_diagonal(A2, 0)
                A2[np.abs(A2) < 1e-5] = 0
            else:
                gl = graphical_lasso(A2, alpha=alpha, max_iter=1000)
                A2 = gl[0]
                A2[np.abs(A2) < 1e-5] = 0
                np.fill_diagonal(A2, 0)
            Gs2.append(nx.from_numpy_matrix(A2))
            error_2.append(
                np.sum(
                    np.logical_xor(
                        np.abs(np.triu(A2, 1)) > 0,
                        np.abs(np.triu(theta2, 1)) > 0)))

        Gs = Gs1 + Gs2

        try:
            #rw_kernel = rw.RandomWalk(Gs, c = 0.0001, normalize=0)
            #K = rw_kernel.fit_ARKU_plus(r = 6, normalize_adj=False,   edge_attr= None, verbose=False)
            graph_list = gk.graph_from_networkx(Gs)
            kernel = [{"name": "SP", "with_labels": 0}]
            init_kernel = gk.GraphKernel(kernel=kernel, normalize=0)
            K = init_kernel.fit_transform(graph_list)
        except:
            continue

        MMD_functions = [mg.MMD_b, mg.MMD_u]

        kernel_hypothesis = mg.BoostrapMethods(MMD_functions)
        function_arguments = [dict(n=n, m=n), dict(n=n, m=n)]
        kernel_hypothesis.Bootstrap(K, function_arguments, B=B)
        #print(f'p_value {kernel_hypothesis.p_values}')
        #print(f"MMD_u {kernel_hypothesis.sample_test_statistic['MMD_u']}")

        test_info = pd.concat(
            (test_info,
             pd.DataFrame(
                 {
                     'p_val': kernel_hypothesis.p_values['MMD_u'],
                     'sample': sample,
                     'mean_error_1': np.mean(error_1),
                     'mean_error_2': np.mean(error_2),
                     'alpha': alpha,
                     's1': s1,
                     's2': s2
                 },
                 index=[0])),
            ignore_index=True)

    return test_info