def test_sampler(n=500, p=100, signal_fac=1., s=5, sigma=3., rho=0.4, randomizer_scale=1.): inst, const = gaussian_instance, lasso.gaussian signal = np.sqrt(signal_fac * 2 * np.log(p)) X, Y, beta = inst(n=n, p=p, signal=signal, s=s, equicorrelated=False, rho=rho, sigma=sigma, random_signs=True)[:3] n, p = X.shape sigma_ = np.std(Y) W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_ conv = const(X, Y, W, randomizer_scale=randomizer_scale * sigma_) signs = conv.fit() nonzero = signs != 0 beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta)) dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y)))**2 / (n - p) (observed_target, cov_target, cov_target_score, alternatives) = selected_targets(conv.loglike, conv._W, nonzero, dispersion=dispersion) A_scaling = conv.sampler.affine_con.linear_part b_scaling = conv.sampler.affine_con.offset logdens_linear = conv.sampler.logdens_transform[0] posterior_inf = posterior_inference_lasso(observed_target, cov_target, cov_target_score, conv.observed_opt_state, conv.cond_mean, conv.cond_cov, logdens_linear, A_scaling, b_scaling, observed_target) samples = posterior_inf.posterior_sampler(nsample=2000, nburnin=200, step=1.) lci = np.percentile(samples, 5, axis=0) uci = np.percentile(samples, 95, axis=0) coverage = (lci < beta_target) * (uci > beta_target) length = uci - lci print("check ", coverage, length)
def test_instance(): n, p, s = 500, 100, 5 X = np.random.standard_normal((n, p)) beta = np.zeros(p) beta[:s] = np.sqrt(2 * np.log(p) / n) Y = X.dot(beta) + np.random.standard_normal(n) scale_ = np.std(Y) # uses noise of variance n * scale_ / 4 by default L = lasso.gaussian(X, Y, 3 * scale_ * np.sqrt(2 * np.log(p) * np.sqrt(n))) signs = L.fit() E = (signs != 0) M = E.copy() M[-3:] = 1 dispersion = np.linalg.norm( Y - X[:, M].dot(np.linalg.pinv(X[:, M]).dot(Y)))**2 / (n - M.sum()) (observed_target, cov_target, cov_target_score, alternatives) = selected_targets(L.loglike, L._W, M, dispersion=dispersion) print("check shapes", observed_target.shape, E.sum()) result = L.selective_MLE(observed_target, cov_target, cov_target_score)[0] estimate = result['MLE'] pval = result['pvalue'] intervals = np.asarray(result[['lower_confidence', 'upper_confidence']]) beta_target = np.linalg.pinv(X[:, M]).dot(X.dot(beta)) coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1]) print("observed_opt_state ", L.observed_opt_state) #print("check ", np.asarray(result['MLE']), np.asarray(result['unbiased'])) return coverage
def test_selected_targets(n=2000, p=200, signal_fac=1., s=5, sigma=3, rho=0.4, randomizer_scale=1, full_dispersion=True): """ Compare to R randomized lasso """ inst, const = gaussian_instance, lasso.gaussian signal = np.sqrt(signal_fac * 2 * np.log(p)) while True: X, Y, beta = inst(n=n, p=p, signal=signal, s=s, equicorrelated=False, rho=rho, sigma=sigma, random_signs=True)[:3] idx = np.arange(p) sigmaX = rho**np.abs(np.subtract.outer(idx, idx)) print("snr", beta.T.dot(sigmaX).dot(beta) / ((sigma**2.) * n)) n, p = X.shape sigma_ = np.std(Y) W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_ conv = const(X, Y, W, randomizer_scale=randomizer_scale * sigma_) signs = conv.fit() nonzero = signs != 0 print("dimensions", n, p, nonzero.sum()) if nonzero.sum() > 0: dispersion = None if full_dispersion: dispersion = np.linalg.norm( Y - X.dot(np.linalg.pinv(X).dot(Y)))**2 / (n - p) (observed_target, cov_target, cov_target_score, alternatives) = selected_targets(conv.loglike, conv._W, nonzero, dispersion=dispersion) result = conv.selective_MLE(observed_target, cov_target, cov_target_score)[0] estimate = result['MLE'] pval = result['pvalue'] intervals = np.asarray( result[['lower_confidence', 'upper_confidence']]) beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta)) coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1]) print("observed_opt_state ", conv.observed_opt_state) # print("check ", np.asarray(result['MLE']), np.asarray(result['unbiased'])) return pval[beta[nonzero] == 0], pval[ beta[nonzero] != 0], coverage, intervals
def test_standalone_inference(n=2000, p=100, signal_fac=1.5, proportion=0.7, approx=True, MLE=True): """ Check that standalone functions reproduce same p-values as methods of `selectinf.randomized.lasso` """ signal = np.sqrt(signal_fac * np.log(p)) / np.sqrt(n) X = np.random.standard_normal((n, p)) T = np.random.exponential(1, size=(n, )) S = np.random.choice([0, 1], n, p=[0.2, 0.8]) cox_lasso = split_lasso.coxph(X, T, S, 2 * np.ones(p) * np.sqrt(n), proportion) signs = cox_lasso.fit() nonzero = signs != 0 cox_sel = rr.glm.cox(X[:, nonzero], T, S) cox_full = rr.glm.cox(X, T, S) refit_soln = cox_sel.solve(min_its=2000) padded_soln = np.zeros(p) padded_soln[nonzero] = refit_soln cox_full.solve(min_its=2000) full_hess = cox_full.hessian(padded_soln) selected_hess = full_hess[nonzero][:, nonzero] (observed_target, cov_target, cov_target_score, alternatives) = selected_targets(cox_lasso.loglike, None, nonzero, hessian=full_hess, dispersion=1) if nonzero.sum(): if approx: approx_result = cox_lasso.approximate_grid_inference( observed_target, cov_target, cov_target_score) approx_pval = approx_result['pvalue'] testval = approximate_normalizer_inference( proportion, cox_lasso.initial_soln[nonzero], refit_soln, signs[nonzero], selected_hess, cox_lasso.feature_weights[nonzero]) assert np.allclose(testval['pvalue'], approx_pval) else: approx_pval = np.empty(nonzero.sum()) * np.nan if MLE: MLE_result = cox_lasso.selective_MLE(observed_target, cov_target, cov_target_score)[0] MLE_pval = MLE_result['pvalue'] else: MLE_pval = np.empty(nonzero.sum()) * np.nan # working under null here beta = np.zeros(p) testval = approximate_mle_inference(proportion, cox_lasso.initial_soln[nonzero], refit_soln, signs[nonzero], selected_hess, cox_lasso.feature_weights[nonzero]) assert np.allclose(testval['pvalue'], MLE_pval) return approx_pval[beta[nonzero] == 0], MLE_pval[beta[nonzero] == 0], testval else: return [], []
def test_selected_instance(seedn, n=2000, p=200, signal_fac=1.2, s=5, sigma=2, rho=0.7, randomizer_scale=1., full_dispersion=True): """ Compare to R randomized lasso """ inst, const = gaussian_instance, lasso.gaussian signal = np.sqrt(signal_fac * 2 * np.log(p)) while True: np.random.seed(seed=seedn) X, Y, beta = inst(n=n, p=p, signal=signal, s=s, equicorrelated=True, rho=rho, sigma=sigma, random_signs=True)[:3] idx = np.arange(p) sigmaX = rho**np.abs(np.subtract.outer(idx, idx)) print("snr", beta.T.dot(sigmaX).dot(beta) / ((sigma**2.) * n)) n, p = X.shape sigma_ = np.std(Y) W = 0.8 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_ conv = const(X, Y, W, ridge_term=0., randomizer_scale=randomizer_scale * sigma_) signs = conv.fit() nonzero = signs != 0 print("dimensions", n, p, nonzero.sum()) if nonzero.sum() > 0: dispersion = None if full_dispersion: dispersion = np.linalg.norm( Y - X.dot(np.linalg.pinv(X).dot(Y)))**2 / (n - p) (observed_target, cov_target, cov_target_score, alternatives) = selected_targets(conv.loglike, conv._W, nonzero, dispersion=dispersion) result = conv.selective_MLE(observed_target, cov_target, cov_target_score)[0] return result['MLE'], result['lower_confidence'], result[ 'upper_confidence']