Exemplo n.º 1
0
    def test_consistency_of_inferring_sigma(self):
        """ Checks that the same knockoffs are produced
        whether you infer the covariance matrix first and
        pass it to the gaussian_knockoffs generator, or
        you let the generator do the work for you
        """

        n = 25
        p = 300
        rho = 0.5
        X, _, _, _, _ = graphs.sample_data(n=n, p=p, rho=rho, method='AR1')

        # Method 1: infer cov first
        V, _ = utilities.estimate_covariance(X, tol=1e-2)
        np.random.seed(110)
        Xk1 = knockoffs.gaussian_knockoffs(X=X,
                                           Sigma=V,
                                           method='sdp',
                                           max_epochs=1)

        # Method 2: Infer during
        np.random.seed(110)
        Xk2 = knockoffs.gaussian_knockoffs(X=X, method='sdp', max_epochs=1)
        np.testing.assert_array_almost_equal(
            Xk1, Xk2, 5, err_msg='Knockoff gen is inconsistent')
Exemplo n.º 2
0
 def fx_knockoffs_low_n():
     knockoffs.gaussian_knockoffs(
         X=X,
         Sigma=corr_matrix,
         S=None,
         fixedX=True,
     )
Exemplo n.º 3
0
    def check_valid_mxknockoffs(self,
                                X,
                                mu=None,
                                Sigma=None,
                                msg='',
                                **kwargs):

        # S matrix
        all_knockoffs, S = knockoffs.gaussian_knockoffs(X=X,
                                                        mu=mu,
                                                        Sigma=Sigma,
                                                        return_S=True,
                                                        verbose=True,
                                                        **kwargs)

        # Extract knockoffs
        Xk = all_knockoffs[:, :, -1]

        # Test knockoff mean
        if mu is None:
            mu = X.mean(axis=0)
        outmsg = "Knockoffs have incorrect means "
        outmsg += f"for MX knockoffs for {msg}"
        np.testing.assert_array_almost_equal(Xk.mean(axis=0), mu, 2, outmsg)

        # Sigma should be
        if Sigma is None:
            Sigma, _ = utilities.estimate_covariance(X, tol=1e-2)

        # Also rescale X/Xk so S makes sense
        scale = np.sqrt(np.diag(Sigma))
        X = X / scale.reshape(1, -1)
        if mu is None:
            mu = X.mean(axis=0)
        else:
            mu = mu / scale
        Xk = Xk / scale.reshape(1, -1)
        Sigma = Sigma / np.outer(scale, scale)
        S = S / np.outer(scale, scale)

        # Empirical FK correlation matrix
        features = np.concatenate([X, Xk], axis=1)
        G_hat = np.cov(features.T)

        # Calculate population version
        G = np.concatenate([
            np.concatenate([Sigma, Sigma - S]),
            np.concatenate([Sigma - S, Sigma])
        ],
                           axis=1)

        # Test G has correct structure
        outmsg = f"Feature-knockoff cov matrix has incorrect values "
        outmsg += f"for MX knockoffs for {msg} graph "
        np.testing.assert_array_almost_equal(G_hat, G, 2, outmsg)
Exemplo n.º 4
0
    def test_FX_knockoff_dist(self):
        # Test knockoff construction for mvr and SDP
        # on equicorrelated matrices
        n = 500
        p = 5
        for rho in [0.1, 0.9]:
            for gamma in [0.5, 1]:
                for method in ['mvr', 'sdp']:
                    # X values
                    X, _, _, _, corr_matrix, _ = graphs.daibarber2016_graph(
                        n=n, p=p, gamma=gamma, rho=rho)
                    # S matrix
                    trivial_groups = np.arange(0, p, 1) + 1
                    all_knockoffs, S = knockoffs.gaussian_knockoffs(
                        X=X,
                        fixedX=True,
                        copies=int(gamma) + 1,
                        method=method,
                        return_S=True,
                        verbose=False)

                    # Scale properly so we can calculate
                    scale = np.sqrt(np.diag(np.dot(X.T, X)).reshape(1, -1))
                    X = X / scale
                    knockoff_copy = all_knockoffs[:, :, -1] / scale
                    S = S / np.outer(scale, scale)

                    # # Compute empirical (scaled) cov matrix
                    features = np.concatenate([X, knockoff_copy], axis=1)
                    G_hat = np.dot(features.T, features)

                    # Calculate what this should be
                    Sigma = np.dot(X.T, X)
                    G = np.concatenate([
                        np.concatenate([Sigma, Sigma - S]),
                        np.concatenate([Sigma - S, Sigma])
                    ],
                                       axis=1)

                    # Test G has correct structure
                    msg = f"Feature-knockoff cov matrix has incorrect values "
                    msg += f"for daibarber graph, FX knockoffs, rho = {rho}, gamma = {gamma}"
                    np.testing.assert_array_almost_equal(G_hat, G, 5, msg)
Exemplo n.º 5
0
 def fdr_vio_knockoffs():
     knockoffs.gaussian_knockoffs(X=X,
                                  Sigma=corr_matrix,
                                  S=S_bad,
                                  verbose=False)
Exemplo n.º 6
0
    def test_easy_sdp(self):

        # Test non-group SDP first
        n = 200
        p = 50
        X, _, _, _, corr_matrix, groups = graphs.daibarber2016_graph(n=n,
                                                                     p=p,
                                                                     gamma=0.3)

        # S matrix
        trivial_groups = np.arange(0, p, 1) + 1
        S_triv = knockoffs.compute_S_matrix(
            Sigma=corr_matrix,
            groups=trivial_groups,
            method='sdp',
            verbose=True,
        )
        np.testing.assert_array_almost_equal(
            S_triv,
            np.eye(p),
            decimal=2,
            err_msg=
            'solve_group_SDP does not produce optimal S matrix (daibarber graphs)'
        )
        self.check_S_properties(corr_matrix, S_triv, trivial_groups)

        # Repeat for gaussian_knockoffs method
        _, S_triv2 = knockoffs.gaussian_knockoffs(
            X=X,
            Sigma=corr_matrix,
            groups=trivial_groups,
            return_S=True,
            verbose=False,
            method='sdp',
        )
        np.testing.assert_array_almost_equal(
            S_triv2,
            np.eye(p),
            decimal=2,
            err_msg=
            'solve_group_SDP does not produce optimal S matrix (daibarber graphs)'
        )
        self.check_S_properties(corr_matrix, S_triv2, trivial_groups)

        # Test slightly harder case
        _, _, _, _, expected_out, _ = graphs.daibarber2016_graph(n=n,
                                                                 p=p,
                                                                 gamma=0)
        _, S_harder = knockoffs.gaussian_knockoffs(X=X,
                                                   Sigma=corr_matrix,
                                                   groups=groups,
                                                   return_S=True,
                                                   verbose=False,
                                                   method='sdp')
        np.testing.assert_almost_equal(
            S_harder,
            expected_out,
            decimal=2,
            err_msg=
            'solve_group_SDP does not produce optimal S matrix (daibarber graphs)'
        )
        self.check_S_properties(corr_matrix, S_harder, groups)

        # Repeat for ASDP
        _, S_harder_ASDP = knockoffs.gaussian_knockoffs(X=X,
                                                        Sigma=corr_matrix,
                                                        groups=groups,
                                                        method='ASDP',
                                                        return_S=True,
                                                        verbose=False,
                                                        max_block=10)
        np.testing.assert_almost_equal(
            S_harder_ASDP,
            expected_out,
            decimal=2,
            err_msg=
            'solve_group_ASDP does not produce optimal S matrix (daibarber graphs)'
        )
        self.check_S_properties(corr_matrix, S_harder_ASDP, groups)