示例#1
0
def test_logistic():

    for Y, T in [(np.random.binomial(1,0.5,size=(10,)),
                  np.ones(10)),
                 (np.random.binomial(1,0.5,size=(10,)),
                  None),
                 (np.random.binomial(3,0.5,size=(10,)),
                  3*np.ones(10))]:
        X = np.random.standard_normal((10,5))

        L = lasso.logistic(X, Y, 0.1, trials=T)
        L.fit()

        L = lasso.logistic(X, Y, 0.1, trials=T)
        L.fit()

        C = L.constraints

        np.testing.assert_array_less( \
            np.dot(L.constraints.linear_part, L.onestep_estimator),
            L.constraints.offset)

        P = L.summary()['pval']

        return L, C, P
示例#2
0
def test_logistic():

    for Y, T in [(np.random.binomial(1,0.5,size=(10,)),
                  np.ones(10)),
                 (np.random.binomial(1,0.5,size=(10,)),
                  None),
                 (np.random.binomial(3,0.5,size=(10,)),
                  3*np.ones(10))]:
        X = np.random.standard_normal((10,5))

        L = lasso.logistic(X, Y, 0.1, trials=T)
        L.fit()

        L = lasso.logistic(X, Y, 0.1, trials=T)
        L.fit()

        C = L.constraints

        np.testing.assert_array_less( \
            np.dot(L.constraints.linear_part, L.onestep_estimator),
            L.constraints.offset)

        P = L.summary()['pval']

        return L, C, P
示例#3
0
def test_logistic_pvals(n=500, p=200, s=3, sigma=2, rho=0.3, snr=7.):

    counter = 0

    while True:
        counter += 1

        X, y, beta, active, sigma = instance(n=n,
                                             p=p,
                                             s=s,
                                             sigma=sigma,
                                             rho=rho,
                                             snr=snr)

        z = (y > 0)
        X = np.hstack([np.ones((n, 1)), X])

        active = np.array(active)
        active += 1
        active = [0] + list(active)

        L = lasso.logistic(X, z, [0] * 1 + [1.2] * p)
        L.fit()
        S = L.summary('onesided')

        if set(active).issubset(L.active) > 0:
            return [
                p for p, v in zip(S['pval'], S['variable']) if v not in active
            ]
        return []
示例#4
0
def test_logistic_pvals(n=500,
                        p=200,
                        s=3,
                        sigma=2,
                        rho=0.3,
                        snr=7.):

    counter = 0

    while True:
        counter += 1

        X, y, beta, active, sigma = instance(n=n, 
                                             p=p, 
                                             s=s, 
                                             sigma=sigma, 
                                             rho=rho, 
                                             snr=snr)

        z = (y > 0)
        X = np.hstack([np.ones((n,1)), X])

        active = np.array(active)
        active += 1
        active = [0] + list(active)

        L = lasso.logistic(X, z, [0]*1 + [1.2]*p)
        L.fit()
        S = L.summary('onesided')

        if set(active).issubset(L.active) > 0:
            return [p for p, v in zip(S['pval'], S['variable']) if v not in active]
        return []
示例#5
0
def test_logistic_pvals(n=500,
                        p=200,
                        s=3,
                        sigma=2,
                        rho=0.3,
                        snr=7.):

    X, y, beta, true_active, sigma = instance(n=n, 
                                         p=p, 
                                         s=s, 
                                         sigma=sigma, 
                                         rho=rho, 
                                         snr=snr)

    z = (y > 0)
    X = np.hstack([np.ones((n,1)), X])

    active = np.array(true_active)
    active += 1
    active = [0] + list(active)

    L = lasso.logistic(X, z, [0]*1 + [1.2]*p)
    L.fit()
    S = L.summary('onesided')

    true_active = np.nonzero(active)[0]
    if set(true_active).issubset(L.active) > 0:
        return S['pval'], [v in true_active for v in S['variable']]
示例#6
0
def test_logistic_pvals(n=500,
                        p=200,
                        s=3,
                        rho=0.3,
                        signal=15.):

    X, y, beta, true_active = logistic_instance(n=n, 
                                                p=p, 
                                                s=s, 
                                                rho=rho, 
                                                signal=signal,
                                                equicorrelated=False)

    X = np.hstack([np.ones((n,1)), X])

    print(true_active, 'true')
    active = np.array(true_active)
    active += 1
    active = [0] + list(active)
    true_active = active

    L = lasso.logistic(X, y, [0]*1 + [1.2]*p)
    L.fit()
    S = L.summary('onesided')

    print(true_active, L.active)
    if set(true_active).issubset(L.active):
        return S['pval'], [v in true_active for v in S['variable']]
示例#7
0
def test_logistic():
    """
    Check that logistic results agree with R
    """
    tol = 1.e-4
    R_code = """
    library(selectiveInference)
    set.seed(43)
    n = 50
    p = 10
    sigma = 10

    x = matrix(rnorm(n*p),n,p)
    x=scale(x,TRUE,TRUE)

    beta = c(3,2,rep(0,p-2))
    y = x %*% beta + sigma * rnorm(n)
    y=1*(y>mean(y))
    # first run glmnet
    gfit = glmnet(x,y,standardize=FALSE,family="binomial")

    # extract coef for a given lambda; note the 1/n factor!
    # (and here  we DO  include the intercept term)
    lambda = .8
    beta_hat = as.numeric(coef(gfit, s=lambda/n, exact=TRUE))

    # compute fixed lambda p-values and selection intervals
    out = fixedLassoInf(x,y,beta_hat,lambda,family="binomial")
    vlo = out$vlo
    vup = out$vup
    sdvar = out$sd
    coef=out$coef0
    info_mat=out$info.matrix
    beta_hat = beta_hat[c(1, out$vars+1)]
    out
    pval = out$pv
    vars_logit = out$var

    """

    rpy.r(R_code)
    R_pvals = np.asarray(rpy.r('pval'))
    selected_vars = np.asarray(rpy.r('vars_logit'))

    y = np.asarray(rpy.r('y'))
    y = y.reshape(-1)

    beta_hat = np.asarray(rpy.r('as.numeric(beta_hat)'))
    x = np.asarray(rpy.r('x'))
    x = np.hstack([np.ones((x.shape[0], 1)), x])
    L = lasso.logistic(x, y, [0] + [0.8] * (x.shape[1] - 1))
    beta2 = L.fit()[L.active]

    yield np.testing.assert_equal, L.active[1:], selected_vars
    yield np.testing.assert_allclose, beta2, beta_hat, tol, tol, False, 'logistic coef'
    yield np.testing.assert_allclose, L.summary(
        'onesided')['pval'][1:], R_pvals, tol, tol, False, 'logistic pvalues'
def test_logistic():
    tol = 1.e-4
    R_code = """
    library(selectiveInference)
    set.seed(43)
    n = 50
    p = 10
    sigma = 10

    x = matrix(rnorm(n*p),n,p)
    x=scale(x,TRUE,TRUE)

    beta = c(3,2,rep(0,p-2))
    y = x %*% beta + sigma * rnorm(n)
    y=1*(y>mean(y))
    # first run glmnet
    gfit = glmnet(x,y,standardize=FALSE,family="binomial")

    # extract coef for a given lambda; note the 1/n factor!
    # (and here  we DO  include the intercept term)
    lambda = .8
    beta_hat = as.numeric(coef(gfit, s=lambda/n, exact=TRUE))

    # compute fixed lambda p-values and selection intervals
    out = fixedLassoInf(x,y,beta_hat,lambda,family="binomial")
    vlo = out$vlo
    vup = out$vup
    sdvar = out$sd
    coef=out$coef0
    info_mat=out$info.matrix
    beta_hat = beta_hat[c(1, out$vars+1)]
    out
    pval = out$pv
    vars_logit = out$var

    """

    rpy.r(R_code)
    R_pvals = np.asarray(rpy.r('pval'))
    selected_vars = np.asarray(rpy.r('vars_logit'))

    y = np.asarray(rpy.r('y'))
    y = y.reshape(-1)

    beta_hat = np.asarray(rpy.r('as.numeric(beta_hat)'))
    x = np.asarray(rpy.r('x'))
    x = np.hstack([np.ones((x.shape[0],1)), x])
    L = lasso.logistic(x, y, [0] + [0.8] * (x.shape[1]-1))
    beta2 = L.fit()[L.active]

    yield np.testing.assert_equal, L.active[1:], selected_vars
    yield np.testing.assert_allclose, beta2, beta_hat, tol, tol, False, 'logistic coef'
    yield np.testing.assert_allclose, L.summary('onesided')['pval'][1:], R_pvals, tol, tol, False, 'logistic pvalues'
def test_logistic():

    for Y, T in [(np.random.binomial(1,0.5,size=(10,)),
                  np.ones(10)),
                 (np.random.binomial(1,0.5,size=(10,)),
                  None),
                 (np.random.binomial(3,0.5,size=(10,)),
                  3*np.ones(10))]:
        X = np.random.standard_normal((10,5))

        L = lasso.logistic(X, Y, 0.1, trials=T)
        L.fit()
        C = L.constraints

        np.testing.assert_array_less( \
            np.dot(L.constraints.linear_part, L._onestep),
            L.constraints.offset)

        I = L.intervals
        P = L.active_pvalues

        return L, C, I, P