Exemplo n.º 1
0
def test_sim():
    # This function assesses the performance of the knockoff approach
    # relative to its theoretical claims.

    np.random.seed(43234)
    npos = 30
    target_fdr = 0.2
    nrep = 10

    testers = [[kr.CorrelationEffects(), 300, 100, 6],
               [kr.ForwardEffects(pursuit=False), 300, 100, 3.5],
               [kr.ForwardEffects(pursuit=True), 300, 100, 3.5],
               [kr.OLSEffects(), 3000, 200, 3.5]]

    for method in "equi", "sdp":

        if method == "sdp" and not has_cvxopt:
            continue

        for tester_info in testers:

            fdr = 0
            power = 0
            tester = tester_info[0]
            n = tester_info[1]
            p = tester_info[2]
            es = tester_info[3]

            for k in range(nrep):

                x = np.random.normal(size=(n, p))
                x /= np.sqrt(np.sum(x*x, 0))

                coeff = es * (-1)**np.arange(npos)
                y = np.dot(x[:, 0:npos], coeff) + np.random.normal(size=n)

                kn = RegressionFDR(y, x, tester)

                tr = kn.threshold(target_fdr)
                cp = np.sum(kn.stats >= tr)
                cp = max(cp, 1)
                fp = np.sum(kn.stats[npos:] >= tr)
                fdr += fp/cp
                power += np.mean(kn.stats[0:npos] >= tr)

                estimated_fdr = (np.sum(kn.stats <= -tr) /
                                 (1 + np.sum(kn.stats >= tr)))
                assert_array_equal(estimated_fdr < target_fdr, True)

            power /= nrep
            fdr /= nrep

            assert_array_equal(power > 0.6, True)
            assert_array_equal(fdr < target_fdr + 0.05, True)
Exemplo n.º 2
0
def test_sim():
    # This function assesses the performance of the knockoff approach
    # relative to its theoretical claims.

    np.random.seed(43234)
    npos = 30
    target_fdr = 0.2
    nrep = 10

    testers = [[kr.CorrelationEffects(), 300, 100, 6],
               [kr.ForwardEffects(pursuit=False), 300, 100, 3.5],
               [kr.ForwardEffects(pursuit=True), 300, 100, 3.5],
               [kr.OLSEffects(), 3000, 200, 3.5]]

    for method in "equi", "sdp":

        if method == "sdp" and not has_cvxopt:
            continue

        for tester_info in testers:

            fdr = 0
            power = 0
            tester = tester_info[0]
            n = tester_info[1]
            p = tester_info[2]
            es = tester_info[3]

            for k in range(nrep):

                x = np.random.normal(size=(n, p))
                x /= np.sqrt(np.sum(x * x, 0))

                coeff = es * (-1)**np.arange(npos)
                y = np.dot(x[:, 0:npos], coeff) + np.random.normal(size=n)

                kn = RegressionFDR(y, x, tester)

                tr = kn.threshold(target_fdr)
                cp = np.sum(kn.stats >= tr)
                cp = max(cp, 1)
                fp = np.sum(kn.stats[npos:] >= tr)
                fdr += fp / cp
                power += np.mean(kn.stats[0:npos] >= tr)

                estimated_fdr = (np.sum(kn.stats <= -tr) /
                                 (1 + np.sum(kn.stats >= tr)))
                assert_array_equal(estimated_fdr < target_fdr, True)

            power /= nrep
            fdr /= nrep

            assert_array_equal(power > 0.6, True)
            assert_array_equal(fdr < target_fdr + 0.05, True)
Exemplo n.º 3
0
def test_testers(p, tester, method):

    if method == "sdp" and not has_cvxopt:
        return

    np.random.seed(2432)
    n = 200

    y = np.random.normal(size=n)
    x = np.random.normal(size=(n, p))

    kn = RegressionFDR(y, x, tester, design_method=method)
    assert_equal(len(kn.stats), p)
    assert_equal(len(kn.fdr), p)
    kn.summary()  # smoke test
Exemplo n.º 4
0
def test_testers(p, tester, method):

    if method == "sdp" and not has_cvxopt:
        return

    np.random.seed(2432)
    n = 200

    y = np.random.normal(size=n)
    x = np.random.normal(size=(n, p))

    kn = RegressionFDR(y, x, tester, design_method=method)
    assert_equal(len(kn.stats), p)
    assert_equal(len(kn.fdr), p)
    kn.summary()  # smoke test
Exemplo n.º 5
0
def test_testers():
    # Smoke test

    np.random.seed(2432)

    n = 200
    p = 50

    y = np.random.normal(size=n)
    x = np.random.normal(size=(n, p))

    testers = [
        kr.CorrelationEffects(),
        kr.ForwardEffects(pursuit=False),
        kr.ForwardEffects(pursuit=True),
        kr.OLSEffects()
    ]

    for method in "equi", "sdp":

        if method == "sdp" and not has_cvxopt:
            continue

        for tv in testers:
            RegressionFDR(y, x, tv, design_method=method)
Exemplo n.º 6
0
def test_sim(method, tester, n, p, es):
    # This function assesses the performance of the knockoff approach
    # relative to its theoretical claims.

    if method == "sdp" and not has_cvxopt:
        return

    np.random.seed(43234)

    # Number of variables with a non-zero coefficient
    npos = 30

    # Aim to control FDR to this level
    target_fdr = 0.2

    # Number of siumulation replications
    nrep = 10

    if method == "sdp" and not has_cvxopt:
        return

    fdr, power = 0, 0
    for k in range(nrep):

        # Generate the predictors
        x = np.random.normal(size=(n, p))
        x /= np.sqrt(np.sum(x*x, 0))

        # Generate the response variable
        coeff = es * (-1)**np.arange(npos)
        y = np.dot(x[:, 0:npos], coeff) + np.random.normal(size=n)

        kn = RegressionFDR(y, x, tester)

        # Threshold to achieve the target FDR
        tr = kn.threshold(target_fdr)

        # Number of selected coefficients
        cp = np.sum(kn.stats >= tr)

        # Number of false positives
        fp = np.sum(kn.stats[npos:] >= tr)

        # Observed FDR
        fdr += fp / max(cp, 1)

        # Proportion of true positives that are detected
        power += np.mean(kn.stats[0:npos] >= tr)

        # The estimated FDR may never exceed the target FDR
        estimated_fdr = (np.sum(kn.stats <= -tr) /
                         (1 + np.sum(kn.stats >= tr)))
        assert_equal(estimated_fdr < target_fdr, True)

    power /= nrep
    fdr /= nrep

    # Check for reasonable power
    assert_array_equal(power > 0.6, True)

    # Check that we are close to the target FDR
    assert_array_equal(fdr < target_fdr + 0.05, True)
Exemplo n.º 7
0
def test_sim(method, tester, n, p, es):
    # This function assesses the performance of the knockoff approach
    # relative to its theoretical claims.

    if method == "sdp" and not has_cvxopt:
        return

    np.random.seed(43234)

    # Number of variables with a non-zero coefficient
    npos = 30

    # Aim to control FDR to this level
    target_fdr = 0.2

    # Number of siumulation replications
    nrep = 10

    if method == "sdp" and not has_cvxopt:
        return

    fdr, power = 0, 0
    for k in range(nrep):

        # Generate the predictors
        x = np.random.normal(size=(n, p))
        x /= np.sqrt(np.sum(x * x, 0))

        # Generate the response variable
        coeff = es * (-1)**np.arange(npos)
        y = np.dot(x[:, 0:npos], coeff) + np.random.normal(size=n)

        kn = RegressionFDR(y, x, tester)

        # Threshold to achieve the target FDR
        tr = kn.threshold(target_fdr)

        # Number of selected coefficients
        cp = np.sum(kn.stats >= tr)

        # Number of false positives
        fp = np.sum(kn.stats[npos:] >= tr)

        # Observed FDR
        fdr += fp / max(cp, 1)

        # Proportion of true positives that are detected
        power += np.mean(kn.stats[0:npos] >= tr)

        # The estimated FDR may never exceed the target FDR
        estimated_fdr = (np.sum(kn.stats <= -tr) /
                         (1 + np.sum(kn.stats >= tr)))
        assert_equal(estimated_fdr < target_fdr, True)

    power /= nrep
    fdr /= nrep

    # Check for reasonable power
    assert_array_equal(power > 0.6, True)

    # Check that we are close to the target FDR
    assert_array_equal(fdr < target_fdr + 0.05, True)