def test_subset(k=10):

    n, p = 100, 200
    X = np.random.standard_normal((n,p)) + 0.4 * np.random.standard_normal(n)[:,None]
    X /= (X.std(0)[None,:] * np.sqrt(n))
    
    Y = np.random.standard_normal(100) * 0.5
    
    subset = np.ones(n, np.bool)
    subset[-10:] = 0
    FS = forward_stepwise(X, Y, subset=subset,
                          covariance=0.5**2 * np.identity(n))

    for i in range(k):
        FS.next()

    print 'first %s variables selected' % k, FS.variables

    print 'pivots for last variable of 3rd selected model knowing that we performed %d steps of forward stepwise' % k

    print FS.model_pivots(3, saturated=True)
    print FS.model_pivots(3, saturated=False, which_var=[FS.variables[2]], burnin=5000, ndraw=5000)

    FS = forward_stepwise(X, Y, subset=subset)

    for i in range(k):
        FS.next()
    print FS.model_pivots(3, saturated=False, which_var=[FS.variables[2]], burnin=5000, ndraw=5000)
def test_full_pvals(n=100, p=40, rho=0.3, snr=4):

    X, y, beta, active, sigma = instance(n=n, p=p, snr=snr, rho=rho)
    FS = forward_stepwise(X, y, covariance=sigma**2 * np.identity(n))

    from scipy.stats import norm as ndist
    pval = []
    completed_yet = False
    for i in range(min(n, p)):
        FS.next()
        var_select, pval_select = FS.model_pivots(i+1, alternative='twosided',
                                                  which_var=[FS.variables[-1]],
                                                  saturated=False,
                                                  burnin=2000,
                                                  ndraw=8000)[0]
        pval_saturated = FS.model_pivots(i+1, alternative='twosided',
                                         which_var=[FS.variables[-1]],
                                         saturated=True)[0][1]

        # now, nominal ones

        LSfunc = np.linalg.pinv(FS.X[:,FS.variables])
        Z = np.dot(LSfunc[-1], FS.Y) / (np.linalg.norm(LSfunc[-1]) * sigma)
        pval_nominal = 2 * ndist.sf(np.fabs(Z))
        pval.append((var_select, pval_select, pval_saturated, pval_nominal))
            
        if set(active).issubset(np.array(pval)[:,0]) and not completed_yet:
            completed_yet = True
            completion_index = i + 1

    return X, y, beta, active, sigma, np.array(pval), completion_index
def test_FS(k=10):

    n, p = 100, 200
    X = np.random.standard_normal(
        (n, p)) + 0.4 * np.random.standard_normal(n)[:, None]
    X /= (X.std(0)[None, :] * np.sqrt(n))

    Y = np.random.standard_normal(100) * 0.5

    FS = forward_stepwise(X, Y, covariance=0.5**2 * np.identity(n))

    for i in range(k):
        FS.next()

    print 'first %s variables selected' % k, FS.variables

    print 'pivots for 3rd selected model knowing that we performed %d steps of forward stepwise' % k

    print FS.model_pivots(3)
    print FS.model_pivots(3,
                          saturated=False,
                          which_var=[FS.variables[2]],
                          burnin=5000,
                          ndraw=5000)
    print FS.model_quadratic(3)
def test_full_pvals(n=100, p=40, rho=0.3, snr=4):

    X, y, beta, active, sigma = instance(n=n, p=p, snr=snr, rho=rho)
    FS = forward_stepwise(X, y, covariance=sigma**2 * np.identity(n))

    from scipy.stats import norm as ndist
    pval = []
    completed_yet = False
    for i in range(min(n, p)):
        FS.next()
        var_select, pval_select = FS.model_pivots(i + 1,
                                                  alternative='twosided',
                                                  which_var=[FS.variables[-1]],
                                                  saturated=False,
                                                  burnin=2000,
                                                  ndraw=8000)[0]
        pval_saturated = FS.model_pivots(i + 1,
                                         alternative='twosided',
                                         which_var=[FS.variables[-1]],
                                         saturated=True)[0][1]

        # now, nominal ones

        LSfunc = np.linalg.pinv(FS.X[:, FS.variables])
        Z = np.dot(LSfunc[-1], FS.Y) / (np.linalg.norm(LSfunc[-1]) * sigma)
        pval_nominal = 2 * ndist.sf(np.fabs(Z))
        pval.append((var_select, pval_select, pval_saturated, pval_nominal))

        if set(active).issubset(np.array(pval)[:, 0]) and not completed_yet:
            completed_yet = True
            completion_index = i + 1

    return X, y, beta, active, sigma, np.array(pval), completion_index
def test_subset(k=10):

    n, p = 100, 200
    X = np.random.standard_normal(
        (n, p)) + 0.4 * np.random.standard_normal(n)[:, None]
    X /= (X.std(0)[None, :] * np.sqrt(n))

    Y = np.random.standard_normal(100) * 0.5

    subset = np.ones(n, np.bool)
    subset[-10:] = 0
    FS = forward_stepwise(X,
                          Y,
                          subset=subset,
                          covariance=0.5**2 * np.identity(n))

    for i in range(k):
        FS.next()

    print 'first %s variables selected' % k, FS.variables

    print 'pivots for last variable of 3rd selected model knowing that we performed %d steps of forward stepwise' % k

    print FS.model_pivots(3, saturated=True)
    print FS.model_pivots(3,
                          saturated=False,
                          which_var=[FS.variables[2]],
                          burnin=5000,
                          ndraw=5000)

    FS = forward_stepwise(X, Y, subset=subset)

    for i in range(k):
        FS.next()
    print FS.model_pivots(3,
                          saturated=False,
                          which_var=[FS.variables[2]],
                          burnin=5000,
                          ndraw=5000)
def simulate_null(saturated=True):

    n, p = 100, 40
    X = np.random.standard_normal((n,p)) + 0.4 * np.random.standard_normal(n)[:,None]
    X /= (X.std(0)[None,:] * np.sqrt(n))
    
    Y = np.random.standard_normal(100) * 0.5
    
    FS = forward_stepwise(X, Y, covariance=0.5**2 * np.identity(n))
    
    for i in range(5):
        FS.next()

    return [p[-1] for p in FS.model_pivots(3, saturated=saturated,
                                           use_new=False)]
def simulate_null(saturated=True):

    n, p = 100, 40
    X = np.random.standard_normal(
        (n, p)) + 0.4 * np.random.standard_normal(n)[:, None]
    X /= (X.std(0)[None, :] * np.sqrt(n))

    Y = np.random.standard_normal(100) * 0.5

    FS = forward_stepwise(X, Y, covariance=0.5**2 * np.identity(n))

    for i in range(5):
        FS.next()

    return [
        p[-1] for p in FS.model_pivots(3, saturated=saturated, use_new=False)
    ]
def test_FS_unknown(k=10):

    n, p = 100, 200
    X = np.random.standard_normal((n,p)) + 0.4 * np.random.standard_normal(n)[:,None]
    X /= (X.std(0)[None,:] * np.sqrt(n))
    
    Y = np.random.standard_normal(100) * 0.5
    
    FS = forward_stepwise(X, Y)

    for i in range(k):
        FS.next()

    print 'first %s variables selected' % k, FS.variables

    print 'pivots for last variable of 3rd selected model knowing that we performed %d steps of forward stepwise' % k

    print FS.model_pivots(3, saturated=False, which_var=[FS.variables[2]], burnin=5000, ndraw=5000)