def test_logistic(): for Y, T in [(np.random.binomial(1,0.5,size=(10,)), np.ones(10)), (np.random.binomial(1,0.5,size=(10,)), None), (np.random.binomial(3,0.5,size=(10,)), 3*np.ones(10))]: X = np.random.standard_normal((10,5)) L = lasso.logistic(X, Y, 0.1, trials=T) L.fit() L = lasso.logistic(X, Y, 0.1, trials=T) L.fit() C = L.constraints np.testing.assert_array_less( \ np.dot(L.constraints.linear_part, L.onestep_estimator), L.constraints.offset) P = L.summary()['pval'] return L, C, P
def test_logistic_pvals(n=500, p=200, s=3, sigma=2, rho=0.3, snr=7.): counter = 0 while True: counter += 1 X, y, beta, active, sigma = instance(n=n, p=p, s=s, sigma=sigma, rho=rho, snr=snr) z = (y > 0) X = np.hstack([np.ones((n, 1)), X]) active = np.array(active) active += 1 active = [0] + list(active) L = lasso.logistic(X, z, [0] * 1 + [1.2] * p) L.fit() S = L.summary('onesided') if set(active).issubset(L.active) > 0: return [ p for p, v in zip(S['pval'], S['variable']) if v not in active ] return []
def test_logistic_pvals(n=500, p=200, s=3, sigma=2, rho=0.3, snr=7.): counter = 0 while True: counter += 1 X, y, beta, active, sigma = instance(n=n, p=p, s=s, sigma=sigma, rho=rho, snr=snr) z = (y > 0) X = np.hstack([np.ones((n,1)), X]) active = np.array(active) active += 1 active = [0] + list(active) L = lasso.logistic(X, z, [0]*1 + [1.2]*p) L.fit() S = L.summary('onesided') if set(active).issubset(L.active) > 0: return [p for p, v in zip(S['pval'], S['variable']) if v not in active] return []
def test_logistic_pvals(n=500, p=200, s=3, sigma=2, rho=0.3, snr=7.): X, y, beta, true_active, sigma = instance(n=n, p=p, s=s, sigma=sigma, rho=rho, snr=snr) z = (y > 0) X = np.hstack([np.ones((n,1)), X]) active = np.array(true_active) active += 1 active = [0] + list(active) L = lasso.logistic(X, z, [0]*1 + [1.2]*p) L.fit() S = L.summary('onesided') true_active = np.nonzero(active)[0] if set(true_active).issubset(L.active) > 0: return S['pval'], [v in true_active for v in S['variable']]
def test_logistic_pvals(n=500, p=200, s=3, rho=0.3, signal=15.): X, y, beta, true_active = logistic_instance(n=n, p=p, s=s, rho=rho, signal=signal, equicorrelated=False) X = np.hstack([np.ones((n,1)), X]) print(true_active, 'true') active = np.array(true_active) active += 1 active = [0] + list(active) true_active = active L = lasso.logistic(X, y, [0]*1 + [1.2]*p) L.fit() S = L.summary('onesided') print(true_active, L.active) if set(true_active).issubset(L.active): return S['pval'], [v in true_active for v in S['variable']]
def test_logistic(): """ Check that logistic results agree with R """ tol = 1.e-4 R_code = """ library(selectiveInference) set.seed(43) n = 50 p = 10 sigma = 10 x = matrix(rnorm(n*p),n,p) x=scale(x,TRUE,TRUE) beta = c(3,2,rep(0,p-2)) y = x %*% beta + sigma * rnorm(n) y=1*(y>mean(y)) # first run glmnet gfit = glmnet(x,y,standardize=FALSE,family="binomial") # extract coef for a given lambda; note the 1/n factor! # (and here we DO include the intercept term) lambda = .8 beta_hat = as.numeric(coef(gfit, s=lambda/n, exact=TRUE)) # compute fixed lambda p-values and selection intervals out = fixedLassoInf(x,y,beta_hat,lambda,family="binomial") vlo = out$vlo vup = out$vup sdvar = out$sd coef=out$coef0 info_mat=out$info.matrix beta_hat = beta_hat[c(1, out$vars+1)] out pval = out$pv vars_logit = out$var """ rpy.r(R_code) R_pvals = np.asarray(rpy.r('pval')) selected_vars = np.asarray(rpy.r('vars_logit')) y = np.asarray(rpy.r('y')) y = y.reshape(-1) beta_hat = np.asarray(rpy.r('as.numeric(beta_hat)')) x = np.asarray(rpy.r('x')) x = np.hstack([np.ones((x.shape[0], 1)), x]) L = lasso.logistic(x, y, [0] + [0.8] * (x.shape[1] - 1)) beta2 = L.fit()[L.active] yield np.testing.assert_equal, L.active[1:], selected_vars yield np.testing.assert_allclose, beta2, beta_hat, tol, tol, False, 'logistic coef' yield np.testing.assert_allclose, L.summary( 'onesided')['pval'][1:], R_pvals, tol, tol, False, 'logistic pvalues'
def test_logistic(): tol = 1.e-4 R_code = """ library(selectiveInference) set.seed(43) n = 50 p = 10 sigma = 10 x = matrix(rnorm(n*p),n,p) x=scale(x,TRUE,TRUE) beta = c(3,2,rep(0,p-2)) y = x %*% beta + sigma * rnorm(n) y=1*(y>mean(y)) # first run glmnet gfit = glmnet(x,y,standardize=FALSE,family="binomial") # extract coef for a given lambda; note the 1/n factor! # (and here we DO include the intercept term) lambda = .8 beta_hat = as.numeric(coef(gfit, s=lambda/n, exact=TRUE)) # compute fixed lambda p-values and selection intervals out = fixedLassoInf(x,y,beta_hat,lambda,family="binomial") vlo = out$vlo vup = out$vup sdvar = out$sd coef=out$coef0 info_mat=out$info.matrix beta_hat = beta_hat[c(1, out$vars+1)] out pval = out$pv vars_logit = out$var """ rpy.r(R_code) R_pvals = np.asarray(rpy.r('pval')) selected_vars = np.asarray(rpy.r('vars_logit')) y = np.asarray(rpy.r('y')) y = y.reshape(-1) beta_hat = np.asarray(rpy.r('as.numeric(beta_hat)')) x = np.asarray(rpy.r('x')) x = np.hstack([np.ones((x.shape[0],1)), x]) L = lasso.logistic(x, y, [0] + [0.8] * (x.shape[1]-1)) beta2 = L.fit()[L.active] yield np.testing.assert_equal, L.active[1:], selected_vars yield np.testing.assert_allclose, beta2, beta_hat, tol, tol, False, 'logistic coef' yield np.testing.assert_allclose, L.summary('onesided')['pval'][1:], R_pvals, tol, tol, False, 'logistic pvalues'
def test_logistic(): for Y, T in [(np.random.binomial(1,0.5,size=(10,)), np.ones(10)), (np.random.binomial(1,0.5,size=(10,)), None), (np.random.binomial(3,0.5,size=(10,)), 3*np.ones(10))]: X = np.random.standard_normal((10,5)) L = lasso.logistic(X, Y, 0.1, trials=T) L.fit() C = L.constraints np.testing.assert_array_less( \ np.dot(L.constraints.linear_part, L._onestep), L.constraints.offset) I = L.intervals P = L.active_pvalues return L, C, I, P