Exemplo n.º 1
0
    def test_MX_knockoff_dist(self):

        # Test knockoff construction for mvr and SDP
        # on equicorrelated matrices
        np.random.seed(110)
        n = 100000
        copies = 3
        p = 5

        # Check with a non-correlation matrix
        V = 4 * dgp.AR1(p=p, rho=0.5)
        mu = np.random.randn(p)
        print(f"true mu: {mu}")
        dgprocess = dgp.DGP(mu=mu, Sigma=V)
        X, _, _, _, _ = dgprocess.sample_data(
            n=n,
            p=p,
        )
        print(f"X mean: {X.mean(axis=0)}")

        # Check validity for oracle cov matrix
        self.check_valid_mxknockoffs(X,
                                     mu=mu,
                                     Sigma=V,
                                     msg=f"ORACLE 4*AR1(rho=0.5)")

        # Check validity for estimated cov matrix
        print("Here now")
        self.check_valid_mxknockoffs(X, msg=f"ESTIMATED 4*AR1(rho=0.5)")

        # Check for many types of data
        for rho in [0.1, 0.9]:
            for gamma in [0.5, 1]:
                for method in ["mvr", "sdp"]:

                    mu = 10 * np.random.randn(p)
                    X, _, _, _, corr_matrix, _ = dgp.block_equi_graph(
                        n=n, p=p, gamma=gamma, rho=rho, mu=mu)

                    # Check validity for oracle correlation matrix
                    self.check_valid_mxknockoffs(
                        X,
                        mu=mu,
                        Sigma=corr_matrix,
                        msg=f"blockequi graph, rho = {rho}, gamma = {gamma}",
                    )

                    # Check validity for estimation
                    self.check_valid_mxknockoffs(
                        X,
                        msg=
                        f"ESTIMATED blockequi graph, rho = {rho}, gamma = {gamma}",
                    )
Exemplo n.º 2
0
    def test_blockequi_sample(self):

        # Check that defaults are correct - start w cov matrix
        _, _, beta, _, V, _ = dgp.block_equi_graph()

        # Construct expected cov matrix -  this is a different
        # construction than the actual function
        def construct_expected_V(p, groupsize, rho, gamma):

            # Construct groups with rho ingroup correlation
            block = np.zeros((groupsize, groupsize)) + rho
            block += (1 - rho) * np.eye(groupsize)
            blocks = [block for _ in range(int(p / groupsize))]
            expected = sp.linalg.block_diag(*blocks)

            # Add gamma between-group correlations
            expected[expected == 0] = gamma * rho
            return expected

        expected = construct_expected_V(p=1000, groupsize=5, rho=0.5, gamma=0)

        # Test equality with actual one
        np.testing.assert_array_almost_equal(
            V, expected, err_msg="Default blockequi cov matrix is incorrect"
        )

        # Check number of nonzero groups
        groupsize = 5
        nonzero_inds = np.arange(0, 1000, 1)[beta != 0]
        num_nonzero_groups = np.unique(nonzero_inds // 5).shape[0]
        self.assertTrue(
            num_nonzero_groups == 20,
            msg=f"Default blockequi beta has {num_nonzero_groups} nonzero groups, expected 20",
        )

        # Check number of nonzero features
        num_nonzero_features = (beta != 0).sum()
        self.assertTrue(
            num_nonzero_features == 100,
            msg=f"Default blockequi beta has {num_nonzero_features} nonzero features, expected 100",
        )
Exemplo n.º 3
0
    def test_FX_knockoff_dist(self):
        # Test knockoff construction for mvr and SDP
        # on equicorrelated matrices
        n = 500
        p = 5
        for rho in [0.1, 0.9]:
            for gamma in [0.5, 1]:
                for method in ["mvr", "sdp"]:
                    # X values
                    X, _, _, _, corr_matrix, _ = dgp.block_equi_graph(
                        n=n, p=p, gamma=gamma, rho=rho)
                    # S matrix
                    trivial_groups = np.arange(0, p, 1) + 1
                    ksampler = knockoffs.FXSampler(X=X,
                                                   method=method,
                                                   verbose=False)
                    Xk = ksampler.sample_knockoffs()
                    S = ksampler.fetch_S()

                    # Compute empirical (scaled) cov matrix
                    features = np.concatenate([X, Xk], axis=1)
                    G_hat = np.dot(features.T, features)

                    # Calculate what this should be
                    Sigma = np.dot(X.T, X)
                    G = np.concatenate(
                        [
                            np.concatenate([Sigma, Sigma - S]),
                            np.concatenate([Sigma - S, Sigma]),
                        ],
                        axis=1,
                    )

                    # Test G has correct structure
                    msg = f"Feature-knockoff cov matrix has incorrect values "
                    msg += f"for blockequi graph, FX knockoffs, rho = {rho}, gamma = {gamma}"
                    np.testing.assert_array_almost_equal(G_hat, G, 5, msg)
Exemplo n.º 4
0
    def test_error_raising(self):

        # Generate data
        n = 100
        p = 100
        X, _, _, _, corr_matrix, groups = dgp.block_equi_graph(n=n,
                                                               p=p,
                                                               gamma=1,
                                                               rho=0.8)
        S_bad = np.eye(p)

        def fdr_vio_knockoffs():
            ksampler = knockoffs.GaussianSampler(X=X,
                                                 Sigma=corr_matrix,
                                                 S=S_bad,
                                                 verbose=False)
            ksampler.sample_knockoffs()

        self.assertRaisesRegex(
            np.linalg.LinAlgError,
            "meaning FDR control violations are extremely likely",
            fdr_vio_knockoffs,
        )

        # Test FX knockoff violations
        def fx_knockoffs_low_n():
            knockoffs.FXSampler(
                X=X,
                Sigma=corr_matrix,
                S=None,
            )

        self.assertRaisesRegex(
            np.linalg.LinAlgError,
            "FX knockoffs can't be generated with n",
            fx_knockoffs_low_n,
        )
Exemplo n.º 5
0
    def test_easy_sdp(self):

        # Test non-group SDP first
        n = 200
        p = 50
        X, _, _, _, corr_matrix, groups = dgp.block_equi_graph(n=n,
                                                               p=p,
                                                               gamma=0.3)

        # S matrix
        trivial_groups = np.arange(0, p, 1) + 1
        S_triv = smatrix.compute_smatrix(
            Sigma=corr_matrix,
            groups=trivial_groups,
            method="sdp",
            verbose=True,
        )
        np.testing.assert_array_almost_equal(
            S_triv,
            np.eye(p),
            decimal=2,
            err_msg=
            "solve_group_SDP does not produce optimal S matrix (blockequi graphs)",
        )
        self.check_S_properties(corr_matrix, S_triv, trivial_groups)

        # Repeat for gaussian_knockoffs method
        ksampler = knockoffs.GaussianSampler(
            X=X,
            Sigma=corr_matrix,
            groups=trivial_groups,
            verbose=False,
            method="sdp",
        )
        S_triv2 = ksampler.fetch_S()

        np.testing.assert_array_almost_equal(
            S_triv2,
            np.eye(p),
            decimal=2,
            err_msg=
            "solve_group_SDP does not produce optimal S matrix (blockequi graphs)",
        )
        self.check_S_properties(corr_matrix, S_triv2, trivial_groups)

        # Test slightly harder case
        _, _, _, _, expected_out, _ = dgp.block_equi_graph(n=n, p=p, gamma=0)
        ksampler = knockoffs.GaussianSampler(X=X,
                                             Sigma=corr_matrix,
                                             groups=groups,
                                             verbose=False,
                                             method="sdp")
        S_harder = ksampler.fetch_S()
        np.testing.assert_almost_equal(
            S_harder,
            expected_out,
            decimal=2,
            err_msg=
            "solve_group_SDP does not produce optimal S matrix (blockequi graphs)",
        )
        self.check_S_properties(corr_matrix, S_harder, groups)

        # Repeat for ASDP
        ksampler = knockoffs.GaussianSampler(
            X=X,
            Sigma=corr_matrix,
            groups=groups,
            method="ASDP",
            verbose=False,
            max_block=10,
        )
        S_harder_ASDP = ksampler.fetch_S()
        np.testing.assert_almost_equal(
            S_harder_ASDP,
            expected_out,
            decimal=2,
            err_msg=
            "solve_group_ASDP does not produce optimal S matrix (blockequi graphs)",
        )
        self.check_S_properties(corr_matrix, S_harder_ASDP, groups)