def test_distribution_sphere(n=15, p=10, sigma=1., nsample=2000, sample_constraints=False): # see if we really are sampling from # correct distribution # by comparing to an accept-reject sampler con, y = _generate_constraints()[:2] accept_reject_sample = [] hit_and_run_sample, W = AC.sample_from_sphere(con, y, ndraw=25000, burnin=10000) statistic = lambda x: np.fabs(x).max() family = discrete_family([statistic(s) for s in hit_and_run_sample], W) radius = np.linalg.norm(y) count = 0 pvalues = [] while True: U = np.random.standard_normal(n) U /= np.linalg.norm(U) U *= radius if con(U): accept_reject_sample.append(U) count += 1 true_sample = np.array( [statistic(s) for s in accept_reject_sample]) if (count + 1) % 100 == 0: pvalues.extend([family.cdf(0, t) for t in true_sample]) print np.mean(pvalues), np.std(pvalues) if sample_constraints: con, y = _generate_constraints()[:2] hit_and_run_sample, W = AC.sample_from_sphere(con, y, ndraw=10000, burnin=10000) family = discrete_family( [statistic(s) for s in hit_and_run_sample], W) radius = np.linalg.norm(y) accept_reject_sample = [] if count >= nsample: break U = np.linspace(0, 1, 101) plt.plot(U, sm.distributions.ECDF(pvalues)(U)) plt.plot([0, 1], [0, 1])
def test_CV(ndraw=500, sigma_known=True, burnin=100, s=7, rho=0.3, method=lasso_tuned, snr=5): # generate a null and alternative pvalue # from a particular model X, Y, beta, active, sigma = data_instance(n=500, p=100, s=s, rho=rho, snr=snr) if sigma_known: sigma = sigma else: sigma = None method_ = method(Y, X, scale_inter=0.0001, scale_valid=0.0001, scale_select=0.0001) if True: do_null = True if do_null: which_var = method_.active_set[s] # the first null one method_.setup_inference(which_var) iter(method_) for i in range(ndraw + burnin): method_.next() Z = np.array(method_.null_sample[which_var][burnin:]) family = discrete_family(Z, np.ones_like(Z)) obs = method_._gaussian_obs[which_var] pval0 = family.cdf(0, obs) pval0 = 2 * min(pval0, 1 - pval0) else: pval0 = np.random.sample() which_var = 0 method_.setup_inference(which_var) iter(method_) for i in range(ndraw + burnin): method_.next() family = discrete_family(method_.null_sample[which_var][burnin:], np.ones(ndraw)) obs = method_._gaussian_obs[which_var] pvalA = family.cdf(0, obs) pvalA = 2 * min(pvalA, 1 - pvalA) return pval0, pvalA, method_
def test_distribution_sphere(n=15, p=10, sigma=1., nsim=2000, sample_constraints=False, burnin=10000, ndraw=10000): # see if we really are sampling from # correct distribution # by comparing to an accept-reject sampler con, y = _generate_constraints()[:2] accept_reject_sample = [] hit_and_run_sample, W = AC.sample_from_sphere(con, y, ndraw=ndraw, burnin=burnin) statistic = lambda x: np.fabs(x).max() family = discrete_family([statistic(s) for s in hit_and_run_sample], W) radius = np.linalg.norm(y) count = 0 pvalues = [] while True: U = np.random.standard_normal(n) U /= np.linalg.norm(U) U *= radius if con(U): accept_reject_sample.append(U) count += 1 true_sample = np.array([statistic(s) for s in accept_reject_sample]) if (count + 1) % int(nsim / 10) == 0: pvalues.extend([family.cdf(0, t) for t in true_sample]) print np.mean(pvalues), np.std(pvalues) if sample_constraints: con, y = _generate_constraints()[:2] hit_and_run_sample, W = AC.sample_from_sphere(con, y, ndraw=ndraw, burnin=burnin) family = discrete_family([statistic(s) for s in hit_and_run_sample], W) radius = np.linalg.norm(y) accept_reject_sample = [] if count >= nsim: break U = np.linspace(0, 1, 101)
def test_CV(ndraw=500, sigma_known=True, burnin=100, s=7, rho=0.3, method=sqrt_lasso_tuned, snr=5): # generate a null and alternative pvalue # from a particular model X, Y, beta, active, sigma = data_instance(s=s, rho=rho, snr=snr, n=500) if sigma_known: sigma = sigma else: sigma = None method_ = method(Y, X, target_R2=0.8, sigma=sigma) if (set(range(7)).issubset(method_.active_set) and method_.active_set.shape[0] > 7): do_null = True if do_null: which_var = method_.active_set[s] # the first null one method_.setup_inference(which_var) ; iter(method_) for i in range(ndraw + burnin): method_.next() Z = np.array(method_.null_sample[which_var][burnin:]) family = discrete_family(Z, np.ones_like(Z)) obs = (method_._X_j * Y).sum() pval0 = family.cdf(0, obs) pval0 = 2 * min(pval0, 1 - pval0) else: pval0 = np.random.sample() which_var = 0 method_.setup_inference(which_var); iter(method_) for i in range(ndraw + burnin): method_.next() family = discrete_family(method_.null_sample[which_var][burnin:], np.ones(ndraw)) obs = (method_._X_j * Y).sum() pvalA = family.cdf(0, obs) pvalA = 2 * min(pvalA, 1 - pvalA) return pval0, pvalA, method_
def test_discreteExFam(): X = np.arange(100) pois = discrete_family(X, poisson.pmf(X, 1)) tol = 1e-5 print (pois._leftCutFromRight(theta=0.4618311,rightCut=(5,.5)), pois._test2RejectsLeft(theta=2.39,observed=5,auxVar=.5)) print pois.interval(observed=5,alpha=.05,randomize=True,auxVar=.5) print abs(1-sum(pois.pdf(0))) pois.ccdf(0, 3, .4) print pois.Var(np.log(2), lambda x: x) print pois.Cov(np.log(2), lambda x: x, lambda x: x) lc = pois._rightCutFromLeft(0, (0,.01)) print (0,0.01), pois._leftCutFromRight(0, lc) pois._rightCutFromLeft(-10, (0,.01)) #[pois.test2Cutoffs(t)[1] for t in range(-10,3)] pois._critCovFromLeft(-10, (0,.01)) pois._critCovFromLeft(0, (0,.01)) pois._critCovFromRight(0, lc) pois._critCovFromLeft(5, (5, 1)) pois._test2RejectsLeft(np.log(5),5) pois._test2RejectsRight(np.log(5),5) pois._test2RejectsLeft(np.log(20),5) pois._test2RejectsRight(np.log(.1),5) print pois._inter2Upper(5,auxVar=.5) print pois.interval(5,auxVar=.5)
def test_CV(ndraw=500, sigma_known=True, burnin=100, s=7, rho=0.3, method=lasso_tuned, snr=5): # generate a null and alternative pvalue # from a particular model X, Y, beta, active, sigma = data_instance(n=500, p=100, s=s, rho=rho, snr=snr) if sigma_known: sigma = sigma else: sigma = None method_ = method(Y, X, scale_inter=0.0001, scale_valid=0.0001, scale_select=0.0001) if True: do_null = True if do_null: which_var = method_.active_set[s] # the first null one method_.setup_inference(which_var) ; iter(method_) for i in range(ndraw + burnin): method_.next() Z = np.array(method_.null_sample[which_var][burnin:]) family = discrete_family(Z, np.ones_like(Z)) obs = method_._gaussian_obs[which_var] pval0 = family.cdf(0, obs) pval0 = 2 * min(pval0, 1 - pval0) else: pval0 = np.random.sample() which_var = 0 method_.setup_inference(which_var); iter(method_) for i in range(ndraw + burnin): method_.next() family = discrete_family(method_.null_sample[which_var][burnin:], np.ones(ndraw)) obs = method_._gaussian_obs[which_var] pvalA = family.cdf(0, obs) pvalA = 2 * min(pvalA, 1 - pvalA) return pval0, pvalA, method_
def _single_parameter_inference(observed_target, target_cov, learning_data, proposal_density, hypothesis=0, alpha=0.1): ''' lambda t: scipy.stats.norm.pdf(t / np.sqrt(target_cov)) * weight_fn(t) Parameters ---------- observed_target : float target_cov : np.float((1, 1)) hypothesis : float Hypothesised true mean of target. alpha : np.float Level for 1 - confidence. Returns ------- pivot : float Probability integral transform of the observed_target at mean parameter "hypothesis" confidence_interval : (float, float) (1 - alpha) * 100% confidence interval. ''' T, Y = learning_data target_val = T[Y == 1] target_var = target_cov[0, 0] target_sd = np.sqrt(target_var) weight_val = ndist.pdf( (target_val - observed_target) / target_sd) / proposal_density( target_val.reshape((-1, 1))) exp_family = discrete_family(target_val, weight_val) pivot = exp_family.cdf((hypothesis - observed_target) / target_var, x=observed_target) pivot = 2 * min(pivot, 1 - pivot) pvalue = exp_family.cdf(-observed_target / target_var, x=observed_target) pvalue = 2 * min(pvalue, 1 - pvalue) interval = exp_family.equal_tailed_interval(observed_target, alpha=alpha) rescaled_interval = (interval[0] * target_var, interval[1] * target_var) return pivot, rescaled_interval, pvalue, exp_family # TODO: should do MLE as well does discrete_family do this?
def test_MLE(): X = np.arange(100) observed = 4 pois = discrete_family(X, poisson.pmf(X, 4.5)) MLE, var = pois.MLE(observed, tol=1.e-7, max_iter=30)[:2] mean_param = pois.E(MLE, lambda x: x) nt.assert_true(np.fabs(mean_param - observed) / observed < 1.e-4) nt.assert_true(np.fabs(mean_param - var * mean_param**2) < 1.e-3)
def cross_inference(learning_data, nuisance, direction, fit_probability, nref=200, fit_args={}): T, Y = learning_data idx = np.arange(T.shape[0]) np.random.shuffle(idx) Tshuf, Yshuf = T[idx], Y[idx] reference_T = Tshuf[:nref] reference_Y = Yshuf[:nref] nrem = T.shape[0] - nref learning_T = Tshuf[nref:(nref + int(nrem / 2))] learning_Y = Tshuf[nref:(nref + int(nrem / 2))] dens_T = Tshuf[(nref + int(nrem / 2)):] pvalues = [] weight_fns = fit_probability(learning_T, learning_Y, **fit_args) for (weight_fn, cur_nuisance, cur_direction, learn_T, ref_T, ref_Y, d_T) in zip(weight_fns, nuisance, direction, learning_T.T, reference_T.T, reference_Y.T, dens_T.T): def new_weight_fn(nuisance, direction, weight_fn, target_val): return weight_fn( np.multiply.outer(target_val, direction) + nuisance[None, :]) new_weight_fn = functools.partial(new_weight_fn, cur_nuisance, cur_direction, weight_fn) weight_val = new_weight_fn(d_T) exp_family = discrete_family(d_T, weight_val) print(ref_Y) pval = [exp_family.cdf(0, x=t) for t, y in zip(ref_T, ref_Y) if y == 1] pvalues.append(pval) return pvalues
def test_discreteExFam(): X = np.arange(100) pois = discrete_family(X, poisson.pmf(X, 1)) tol = 1e-5 print(pois._leftCutFromRight(theta=0.4618311, rightCut=(5, .5)), pois._test2RejectsLeft(theta=2.39, observed=5, auxVar=.5)) print(pois.interval(observed=5, alpha=.05, randomize=True, auxVar=.5)) print(abs(1 - sum(pois.pdf(0)))) pois.ccdf(0, 3, .4) print(pois.MLE(1.3)) print(pois.Var(np.log(2), lambda x: x)) print(pois.Cov(np.log(2), lambda x: x, lambda x: x)) lc = pois._rightCutFromLeft(0, (0, .01)) print((0, 0.01), pois._leftCutFromRight(0, lc)) pois._rightCutFromLeft(-10, (0, .01)) #[pois.test2Cutoffs(t)[1] for t in range(-10,3)] pois._critCovFromLeft(-10, (0, .01)) pois._critCovFromLeft(0, (0, .01)) pois._critCovFromRight(0, lc) pois._critCovFromLeft(5, (5, 1)) pois._test2RejectsLeft(np.log(5), 5) pois._test2RejectsRight(np.log(5), 5) pois._test2RejectsLeft(np.log(20), 5) pois._test2RejectsRight(np.log(.1), 5) print(pois._inter2Upper(5, auxVar=.5)) print(pois.interval(5, auxVar=.5))
def test_one_inactive_coordinate_handcoded(): s, n, p = 5, 200, 20 randomizer = randomization.laplace((p, ), scale=1.) X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=0.1, snr=14) nonzero = np.where(beta)[0] lam_frac = 1. loss = rr.glm.logistic(X, y) epsilon = 1. lam = lam_frac * np.mean( np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 10000)))).max(0)) W = np.ones(p) * lam W += lam * np.arange(p) / 200 W[0] = 0 penalty = rr.group_lasso(np.arange(p), weights=dict(zip(np.arange(p), W)), lagrange=1.) print(lam) # our randomization M_est1 = glm_group_lasso(loss, epsilon, penalty, randomizer) M_est1.solve() bootstrap_score1 = M_est1.setup_sampler() active = M_est1.selection_variable['variables'] if set(nonzero).issubset(np.nonzero(active)[0]): boot_target, target_observed = pairs_bootstrap_glm(loss, active) # target are all true null coefficients selected sampler = lambda: np.random.choice(n, size=(n, ), replace=True) target_cov, cov1 = bootstrap_cov(sampler, boot_target, cross_terms=(bootstrap_score1, )) # have checked that covariance up to here agrees with other test_glm_langevin example active_set = np.nonzero(active)[0] inactive_selected = I = [ i for i in np.arange(active_set.shape[0]) if active_set[i] not in nonzero ] # is it enough only to bootstrap the inactive ones? # seems so... if not I: return None # take the first inactive one I = I[:1] A1, b1 = M_est1.linear_decomposition(cov1[I], target_cov[I][:, I], target_observed[I]) print(I, 'I', target_observed[I]) target_inv_cov = np.linalg.inv(target_cov[I][:, I]) initial_state = np.hstack( [target_observed[I], M_est1.observed_opt_state]) ntarget = len(I) target_slice = slice(0, ntarget) opt_slice1 = slice(ntarget, p + ntarget) def target_gradient(state): # with many samplers, we will add up the `target_slice` component # many target_grads # and only once do the Gaussian addition of full_grad target = state[target_slice] opt_state1 = state[opt_slice1] target_grad1 = M_est1.randomization_gradient( target, (A1, b1), opt_state1) full_grad = np.zeros_like(state) full_grad[opt_slice1] = -target_grad1[1] full_grad[target_slice] -= target_grad1[0] full_grad[target_slice] -= target_inv_cov.dot(target) return full_grad def target_projection(state): opt_state1 = state[opt_slice1] state[opt_slice1] = M_est1.projection(opt_state1) return state target_langevin = projected_langevin(initial_state, target_gradient, target_projection, 1. / p) Langevin_steps = 10000 burning = 2000 samples = [] for i in range(Langevin_steps + burning): target_langevin.next() if (i > burning): samples.append(target_langevin.state[target_slice].copy()) test_stat = lambda x: x observed = test_stat(target_observed[I]) sample_test_stat = np.array([test_stat(x) for x in samples]) family = discrete_family(sample_test_stat, np.ones_like(sample_test_stat)) pval = family.ccdf(0, observed) pval = 2 * min(pval, 1 - pval) _i = I[0] naive_Z = target_observed[_i] / np.sqrt(target_cov[_i, _i]) naive_pval = ndist.sf(np.fabs(naive_Z)) naive_pval = 2 * min(naive_pval, 1 - naive_pval) print('naive Z', naive_Z, naive_pval) return pval, naive_pval, False
def test_simple_problem(n=100, randomization_dist="logistic", threshold=1, weights="neutral", Langevin_steps=10000, burning=0): step_size = 1. / n y = np.random.standard_normal(n) obs = np.sqrt(n) * np.mean(y) if randomization_dist == "logistic": omega = np.random.logistic(loc=0, scale=1, size=1) if (obs + omega < threshold): return -1 #initial_state = np.ones(n) initial_state = np.zeros(n) y_cs = (y - np.mean(y)) / np.sqrt(n) def full_projection(state): return state def full_gradient(state, n=n, y_cs=y_cs): gradient = np.zeros(n) if weights == "normal": gradient -= state if (weights == "gumbel"): gumbel_beta = np.sqrt(6) / (1.14 * np.pi) euler = 0.57721 gumbel_mu = -gumbel_beta * euler gumbel_sigma = 1. / 1.14 gradient -= (1. - np.exp(-(state * gumbel_sigma - gumbel_mu) / gumbel_beta)) * gumbel_sigma / gumbel_beta if weights == "logistic": gradient = np.divide(np.exp(-state) - 1, np.exp(-state) + 1) if weights == "neutral": gradient = -np.inner(state, y_cs) * y_cs omega = -np.inner(y_cs, state) + threshold if randomization_dist == "logistic": randomization_derivative = -1. / (1 + np.exp(-omega)) gradient -= y_cs * randomization_derivative return gradient sampler = projected_langevin(initial_state.copy(), full_gradient, full_projection, step_size) samples = [] for i in range(Langevin_steps): sampler.next() if (i > burning): samples.append(sampler.state.copy()) alphas = np.array(samples) pop = [np.inner(y_cs, alphas[i, :]) for i in range(alphas.shape[0])] fam = discrete_family(pop, np.ones_like(pop)) pval = fam.cdf(0, obs) pval = 2 * min(pval, 1 - pval) print "observed: ", obs, "p value: ", pval return pval
def _inference( observed_target, target_cov, weight_fn, # our fitted function success_params=(1, 1), hypothesis=0, alpha=0.1): ''' Produce p-values (or pivots) and confidence intervals having estimated a weighting function. The basic object here is a 1-dimensional exponential family with reference density proportional to lambda t: scipy.stats.norm.pdf(t / np.sqrt(target_cov)) * weight_fn(t) Parameters ---------- observed_target : float target_cov : np.float((1, 1)) hypothesis : float Hypothesised true mean of target. alpha : np.float Level for 1 - confidence. Returns ------- pivot : float Probability integral transform of the observed_target at mean parameter "hypothesis" confidence_interval : (float, float) (1 - alpha) * 100% confidence interval. ''' k, m = success_params # need at least k of m successes target_sd = np.sqrt(target_cov[0, 0]) target_val = np.linspace(-20 * target_sd, 20 * target_sd, 5001) + observed_target if (k, m) != (1, 1): weight_val = np.array( [binom(m, p).sf(k - 1) for p in weight_fn(target_val)]) else: weight_val = np.squeeze(weight_fn(target_val)) weight_val *= ndist.pdf((target_val - observed_target) / target_sd) exp_family = discrete_family(target_val, weight_val) pivot = exp_family.cdf((hypothesis - observed_target) / target_cov[0, 0], x=observed_target) pivot = 2 * min(pivot, 1 - pivot) pvalue = exp_family.cdf(-observed_target / target_cov[0, 0], x=observed_target) pvalue = 2 * min(pvalue, 1 - pvalue) interval = exp_family.equal_tailed_interval(observed_target, alpha=alpha) rescaled_interval = (interval[0] * target_cov[0, 0] + observed_target, interval[1] * target_cov[0, 0] + observed_target) return pivot, rescaled_interval, pvalue, weight_fn, exp_family # TODO: should do MLE as well does discrete_family do this?
def test_kfstep(k=4, s=3, n=100, p=10, Langevin_steps=10000, burning=2000): X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, random_signs=True, s=s, sigma=1.,rho=0, signal=10) epsilon = 0. randomization = laplace(loc=0, scale=1.) j_seq = np.empty(k, dtype=int) s_seq = np.empty(k) left = np.ones(p, dtype=bool) obs = 0 initial_state = np.zeros(n + np.sum([i for i in range(p-k+1,p+1)])) initial_state[:n] = y.copy() mat = [np.array((n, ncol)) for ncol in range(p,p-k,-1)] curr = n keep = np.zeros(p, dtype=bool) for i in range(k): X_left = X[:,left] X_selected = X[:, ~left] if (np.sum(left)<p): P_perp = np.identity(n) - X_selected.dot(np.linalg.pinv(X_selected)) mat[i] = P_perp.dot(X_left) else: mat[i] = X mat_complete = np.zeros((n,p)) mat_complete[:, left] = mat[i] T = np.dot(mat[i].T, y) T_complete = np.dot(mat_complete.T, y) obs = np.max(np.abs(T)) keep = np.copy(~left) random_Z = randomization.rvs(T.shape[0]) T_random = T + random_Z initial_state[curr:(curr+p-i)] = T_random # initializing subgradients curr = curr + p-i j_seq[i] = np.argmax(np.abs(T_random)) s_seq[i] = np.sign(T_random[j_seq[i]]) #def find_index(v, idx1): # _sumF = 0 # _sumT = 0 # idx = idx1+1 # for i in range(v.shape[0]): # if (v[i] == False): # _sumF = _sumF + 1 # else: # _sumT = _sumT + 1 # if _sumT >= idx: break # return (_sumT + _sumF-1) T_complete[left] += random_Z left[np.argmax(np.abs(T_complete))] = False # conditioning linear_part = X[:, keep].T P = np.dot(linear_part.T, np.linalg.pinv(linear_part).T) I = np.identity(linear_part.shape[1]) R = I - P def full_projection(state, n=n, p=p, k=k): """ """ new_state = np.empty(state.shape, np.float) new_state[:n] = state[:n] curr = n for i in range(k): projection = projection_cone(p-i, j_seq[i], s_seq[i]) new_state[curr:(curr+p-i)] = projection(state[curr:(curr+p-i)]) curr = curr+p-i return new_state def full_gradient(state, n=n, p=p, k=k, X=X, mat=mat): data = state[:n] grad = np.empty(n + np.sum([i for i in range(p-k+1,p+1)])) grad[:n] = - data curr = n for i in range(k): subgrad = state[curr:(curr+p-i)] sign_vec = np.sign(-mat[i].T.dot(data) + subgrad) grad[curr:(curr + p - i)] = -sign_vec curr = curr+p-i grad[:n] += mat[i].dot(sign_vec) return grad sampler = projected_langevin(initial_state, full_gradient, full_projection, 1./p) samples = [] for i in range(Langevin_steps): if i>burning: old_state = sampler.state.copy() old_data = old_state[:n] sampler.next() new_state = sampler.state.copy() new_data = new_state[:n] new_data = np.dot(P, old_data) + np.dot(R, new_data) sampler.state[:n] = new_data samples.append(sampler.state.copy()) samples = np.array(samples) Z = samples[:,:n] pop = np.abs(mat[k-1].T.dot(Z.T)).max(0) fam = discrete_family(pop, np.ones_like(pop)) pval = fam.cdf(0, obs) pval = 2 * min(pval, 1 - pval) #stop print('pvalue:', pval) return pval
def test_fstep(s=0, n=100, p=10, Langevin_steps=10000, burning=2000, condition_on_sign=True): X, y, _, nonzero, sigma = instance(n=n, p=p, random_signs=True, s=s, sigma=1.,rho=0) epsilon = 0. randomization = laplace(loc=0, scale=1.) random_Z = randomization.rvs(p) T = np.dot(X.T,y) T_random = T + random_Z T_abs = np.abs(T_random) j_star = np.argmax(T_abs) s_star = np.sign(T_random[j_star]) # this is the subgradient part of the projection if condition_on_sign: projection = projection_cone(p, j_star, s_star) else: projection = projection_cone_nosign(p, j_star) def full_projection(state, n=n, p=p): """ State is (y, u) -- first n coordinates are y, last p are u. """ new_state = np.empty(state.shape, np.float) new_state[:n] = state[:n] new_state[n:] = projection(state[n:]) return new_state obs = np.max(np.abs(T)) eta_star = np.zeros(p) eta_star[j_star] = s_star def full_gradient(state, n=n, p=p, X=X): data = state[:n] subgrad = state[n:] sign_vec = np.sign(-X.T.dot(data) + subgrad) grad = np.empty(state.shape, np.float) grad[n:] = - sign_vec grad[:n] = - (data - X.dot(sign_vec)) return grad state = np.zeros(n+p) state[:n] = y state[n:] = T_random sampler = projected_langevin(state, full_gradient, full_projection, 1./p) samples = [] for i in range(Langevin_steps): if i>burning: sampler.next() samples.append(sampler.state.copy()) samples = np.array(samples) Z = samples[:,:n] pop = np.abs(X.T.dot(Z.T)).max(0) fam = discrete_family(pop, np.ones_like(pop)) pval = fam.cdf(0, obs) pval = 2 * min(pval, 1 - pval) #stop print 'pvalue:', pval return pval
def test_fstep(s=0, n=50, p=10, weights = "gumbel", randomization_dist ="logistic", Langevin_steps = 10000, burning=1000): X, y, _, nonzero, sigma = instance(n=n, p=p, random_signs=True, s=s, sigma=1.,rho=0) epsilon = 0. if randomization_dist == "laplace": randomization = laplace(loc=0, scale=1.) random_Z = randomization.rvs(p) if randomization_dist=="logistic": random_Z = np.random.logistic(loc=0, scale=1, size=p) T = np.dot(X.T,y) T_random = T + random_Z T_abs = np.abs(T_random) j_star = np.argmax(T_abs) s_star = np.sign(T_random[j_star]) # this is the subgradient part of the projection projection = projection_cone(p, j_star, s_star) def full_projection(state, n=n, p=p): """ State is (y, u) -- first n coordinates are y, last p are u. """ new_state = np.empty(state.shape, np.float) new_state[:n] = state[:n] new_state[n:] = projection(state[n:]) return new_state obs = np.max(np.abs(T)) eta_star = np.zeros(p) eta_star[j_star] = s_star def full_gradient(state, n=n, p=p, X=X, y=y): #data = state[:n] alpha = state[:n] subgrad = state[n:] mat = np.dot(X.T, np.diag(y)) omega = - mat.dot(alpha) + subgrad if randomization_dist == "laplace": randomization_derivative = np.sign(omega) if randomization_dist == "logistic": randomization_derivative = -(np.exp(-omega) - 1) / (np.exp(-omega) + 1) if randomization_dist == "normal": randomization_derivative = omega grad = np.empty(state.shape, np.float) #grad[:n] = - (data - X.dot(randomization_derivative)) grad[:n] = np.dot(mat.T,randomization_derivative) if weights == "normal": grad[:n] -= alpha if (weights == "gumbel"): gumbel_beta = np.sqrt(6) / (1.14 * np.pi) euler = 0.57721 gumbel_mu = -gumbel_beta * euler gumbel_sigma = 1. / 1.14 grad[:n] -= (1. - np.exp(-(alpha * gumbel_sigma - gumbel_mu) / gumbel_beta)) * gumbel_sigma / gumbel_beta grad[n:] = - randomization_derivative return grad state = np.zeros(n+p) #state[:n] = y state[:n] = np.zeros(n) state[n:] = T_random sampler = projected_langevin(state, full_gradient, full_projection, 1./p) samples = [] for i in range(Langevin_steps): sampler.next() if (i>burning): samples.append(sampler.state.copy()) samples = np.array(samples) Z = samples[:,:n] print Z.shape mat = np.dot(X.T,np.diag(y)) #pop = [np.linalg.norm(np.dot(mat, Z[i,:].T)) for i in range(Z.shape[0])] #obs = np.linalg.norm(np.dot(X.T,y)) pop = np.abs(np.dot(mat, Z.T)).max(0) fam = discrete_family(pop, np.ones_like(pop)) pval = fam.cdf(0, obs) pval = 2 * min(pval, 1 - pval) #stop print 'pvalue:', pval return pval
def pval(vec_state, full_projection, X, y, obs_residuals, signs, lam, epsilon, nonzero, active): """ """ n, p = X.shape y0 = y.copy() null = [] alt = [] X_E = X[:, active] ndata = y.shape[0] inactive = ~active nalpha = n active_set = np.where(active)[0] print "true nonzero ", nonzero, "active set", active_set if set(nonzero).issubset(active_set): for j, idx in enumerate(active_set): eta = X[:, idx] keep = np.copy(active) #keep = np.ones(p, dtype=bool) keep[idx] = False linear_part = X[:, keep].T P = np.dot(linear_part.T, np.linalg.pinv(linear_part).T) I = np.identity(linear_part.shape[1]) R = I - P fixed_part = np.dot(X.T, np.dot(P, y)) hessian = np.dot(X.T, X) B = hessian + epsilon * np.identity(p) A = B[:, active] matXTR = X.T.dot(R) def full_gradient(vec_state, fixed_part=fixed_part, R=R, obs_residuals=obs_residuals, signs=signs, X=X, lam=lam, epsilon=epsilon, data0=y, hessian=hessian, A=A, matXTR=matXTR, nalpha=nalpha, active=active, inactive=inactive): nactive = np.sum(active) ninactive = np.sum(inactive) alpha = vec_state[:nalpha] betaE = vec_state[nalpha:(nalpha + nactive)] cube = vec_state[(nalpha + nactive):] p = X.shape[1] beta_full = np.zeros(p) beta_full[active] = betaE subgradient = np.zeros(p) subgradient[inactive] = lam * cube subgradient[active] = lam * signs opt_vec = epsilon * beta_full + subgradient # omega = - np.dot(X.T, np.diag(obs_residuals).dot(alpha))/np.sum(alpha) + np.dot(hessian, beta_full) + opt_vec weighted_residuals = np.diag(obs_residuals).dot(alpha) omega = -fixed_part - np.dot(matXTR, weighted_residuals) + np.dot( hessian, beta_full) + opt_vec sign_vec = np.sign(omega) #mat = np.dot(X.T, np.diag(obs_residuals)) mat = np.dot(matXTR, np.diag(obs_residuals)) _gradient = np.zeros(nalpha + nactive + ninactive) _gradient[:nalpha] = -np.ones(nalpha) + np.dot(mat.T, sign_vec) _gradient[nalpha:(nalpha + nactive)] = -np.dot(A.T, sign_vec) _gradient[(nalpha + nactive):] = -lam * sign_vec[inactive] return _gradient sampler = projected_langevin(vec_state.copy(), full_gradient, full_projection, 1. / p) samples = [] for _ in range(5000): sampler.next() samples.append(sampler.state.copy()) samples = np.array(samples) alpha_samples = samples[:, :n] residuals_samples = [ np.diag(obs_residuals).dot(alpha_samples[i, :]) for i in range(len(samples)) ] pop = [ np.inner(eta, np.dot(P, y0) + np.dot(R, z)) for z in residuals_samples ] obs = np.inner(eta, y0) fam = discrete_family(pop, np.ones_like(pop)) pval = fam.cdf(0, obs) pval = 2 * min(pval, 1 - pval) print "observed: ", obs, "p value: ", pval #if pval < 0.0001: # print obs, pval, np.percentile(pop, [0.2,0.4,0.6,0.8,1.0]) if idx in nonzero: alt.append(pval) else: null.append(pval) return null, alt
def pval(vec_state, full_gradient, full_projection, X, y, obs_residuals, nonzero, active): """ """ n, p = X.shape y0 = y.copy() null = [] alt = [] X_E = X[:, active] ndata = y.shape[0] active_set = np.where(active)[0] print "true nonzero ", nonzero, "active set", active_set if set(nonzero).issubset(active_set): #for j, idx in enumerate(active_set): #eta = X[:, idx] #keep = np.copy(active) #keep = np.ones(p, dtype=bool) #keep[idx] = False #linear_part = X[:,keep].T #P = np.dot(linear_part.T, np.linalg.pinv(linear_part).T) #I = np.identity(linear_part.shape[1]) #R = I - P #fixed_part = np.dot(P, np.dot(X.T, y0)) sampler = projected_langevin(vec_state.copy(), full_gradient, full_projection, 1. / p) samples = [] #boot_samples = bootstrap_samples(y0, P, R) for _ in range(6000): sampler.next() samples.append(sampler.state.copy()) samples = np.array(samples) alpha_samples = samples[:, :n] data_samples = [np.dot(X[:, active].T, np.diag(obs_residuals).dot(alpha_samples[i,:])) for i in range(len(samples))] pop = [np.linalg.norm(z) for z in data_samples] obs = np.linalg.norm(np.dot(X[:, active].T, y0)) #obs = np.linalg.norm(y0) fam = discrete_family(pop, np.ones_like(pop)) pval = fam.cdf(0, obs) pval = 2 * min(pval, 1-pval) print "observed: ", obs, "p value: ", pval #if pval < 0.0001: # print obs, pval, np.percentile(pop, [0.2,0.4,0.6,0.8,1.0]) #if idx in nonzero: # alt.append(pval) #else: null.append(pval) return null, alt
def test_overall_null_two_queries(): s, n, p = 5, 200, 20 randomizer = randomization.laplace((p, ), scale=0.5) X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=0.1, snr=14) nonzero = np.where(beta)[0] lam_frac = 1. loss = rr.glm.logistic(X, y) epsilon = 1. / np.sqrt(n) lam = lam_frac * np.mean( np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 10000)))).max(0)) W = np.ones(p) * lam W[0] = 0 # use at least some unpenalized penalty = rr.group_lasso(np.arange(p), weights=dict(zip(np.arange(p), W)), lagrange=1.) # first randomization M_est1 = glm_group_lasso(loss, epsilon, penalty, randomizer) M_est1.solve() bootstrap_score1 = M_est1.setup_sampler(scaling=2.) # second randomization M_est2 = glm_group_lasso(loss, epsilon, penalty, randomizer) M_est2.solve() bootstrap_score2 = M_est2.setup_sampler(scaling=2.) # we take target to be union of two active sets active = M_est1.selection_variable[ 'variables'] + M_est2.selection_variable['variables'] if set(nonzero).issubset(np.nonzero(active)[0]): boot_target, target_observed = pairs_bootstrap_glm(loss, active) # target are all true null coefficients selected sampler = lambda: np.random.choice(n, size=(n, ), replace=True) target_cov, cov1, cov2 = bootstrap_cov(sampler, boot_target, cross_terms=(bootstrap_score1, bootstrap_score2)) active_set = np.nonzero(active)[0] inactive_selected = I = [ i for i in np.arange(active_set.shape[0]) if active_set[i] not in nonzero ] # is it enough only to bootstrap the inactive ones? # seems so... if not I: return None A1, b1 = M_est1.linear_decomposition(cov1[I], target_cov[I][:, I], target_observed[I]) A2, b2 = M_est2.linear_decomposition(cov2[I], target_cov[I][:, I], target_observed[I]) target_inv_cov = np.linalg.inv(target_cov[I][:, I]) initial_state = np.hstack([ target_observed[I], M_est1.observed_opt_state, M_est2.observed_opt_state ]) ntarget = len(I) target_slice = slice(0, ntarget) opt_slice1 = slice(ntarget, p + ntarget) opt_slice2 = slice(p + ntarget, 2 * p + ntarget) def target_gradient(state): # with many samplers, we will add up the `target_slice` component # many target_grads # and only once do the Gaussian addition of full_grad target = state[target_slice] opt_state1 = state[opt_slice1] opt_state2 = state[opt_slice2] target_grad1 = M_est1.randomization_gradient( target, (A1, b1), opt_state1) target_grad2 = M_est2.randomization_gradient( target, (A2, b2), opt_state2) full_grad = np.zeros_like(state) full_grad[opt_slice1] = -target_grad1[1] full_grad[opt_slice2] = -target_grad2[1] full_grad[target_slice] -= target_grad1[0] + target_grad2[0] full_grad[target_slice] -= target_inv_cov.dot(target) return full_grad def target_projection(state): opt_state1 = state[opt_slice1] state[opt_slice1] = M_est1.projection(opt_state1) opt_state2 = state[opt_slice2] state[opt_slice2] = M_est2.projection(opt_state2) return state target_langevin = projected_langevin(initial_state, target_gradient, target_projection, .5 / (2 * p + 1)) Langevin_steps = 10000 burning = 2000 samples = [] for i in range(Langevin_steps): target_langevin.next() if (i >= burning): samples.append(target_langevin.state[target_slice].copy()) test_stat = lambda x: np.linalg.norm(x) observed = test_stat(target_observed[I]) sample_test_stat = np.array([test_stat(x) for x in samples]) family = discrete_family(sample_test_stat, np.ones_like(sample_test_stat)) pval = family.ccdf(0, observed) return pval, False
def pval(sampler, loss_args, linear_part, data, nonzero): """ The function computes the null and alternative pvalues for a regularized problem. Parameters: ----------- loss: specific loss, e.g. gaussian_Xfixed, logistic_Xrandom penalty: regularization, e.g. selective_l1norm randomization: the distribution of the randomized noise linear part, data: (C, y) To test for the jth parameter, we condition on the C_{\backslash j} y = d_{\backslash j}. nonzero: the true underlying nonzero pattern sigma: noise level of the data, if "None", estimates of covariance is needed Returns: -------- null, alt: null and alternative pvalues. """ n, p = sampler.loss.X.shape data0 = data.copy() active = sampler.penalty.active_set if linear_part is None: off = ~np.identity(p, dtype=bool) E = np.zeros((p, p), dtype=bool) E[off] = active E = np.logical_or(E.T, E) active_set = np.where(E[off])[0] else: active_set = np.where(active)[0] print "true nonzero ", nonzero, "nonzero coefs", active_set null = [] alt = [] if set(nonzero).issubset(active_set): for j, idx in enumerate(active_set): if j not in nonzero: if linear_part is not None: eta = linear_part[:, idx] keep = np.copy(active) keep[idx] = False L = linear_part[:, keep] loss_args['linear_part'] = L.T loss_args['value'] = np.dot(L.T, data) sampler.setup_sampling(data, loss_args=loss_args) samples = sampler.sampling(ndraw=5000, burnin=1000) pop = [np.dot(eta, z) for z, _, in samples] obs = np.dot(eta, data0) else: row, col = nonzero_index(idx, p) print row, col eta = data0[:, row] sampler.setup_sampling(data, loss_args=loss_args) samples = sampler.sampling(ndraw=5000, burnin=1000) pop = [np.dot(eta, z[:, col]) for z, _, in samples] obs = np.dot(eta, data0[:, col]) fam = discrete_family(pop, np.ones_like(pop)) pval = fam.cdf(0, obs) pval = 2 * min(pval, 1 - pval) print "observed: ", obs, "p value: ", pval if pval < 0.0001: print obs, pval, np.percentile(pop, [0.2, 0.4, 0.6, 0.8, 1.0]) if idx in nonzero: alt.append(pval) else: null.append(pval) print 'opt_vars', sampler.penalty.accept_l1_part, sampler.penalty.total_l1_part print 'data', sampler.loss.accept_data, sampler.loss.total_data return null, alt
def simulate(n=100): # description of statistical problem truth = np.array([4., -4]) / np.sqrt(n) data = np.random.standard_normal( (n, 2)) + np.multiply.outer(np.ones(n), truth) def sufficient_stat(data): return np.mean(data, 0) S = sufficient_stat(data) # randomization mechanism class normal_sampler(object): def __init__(self, center, covariance): (self.center, self.covariance) = (np.asarray(center), np.asarray(covariance)) self.cholT = np.linalg.cholesky(self.covariance).T self.shape = self.center.shape def __call__(self, scale=1., size=None): if type(size) == type(1): size = (size, ) size = size or (1, ) if self.shape == (): _shape = (1, ) else: _shape = self.shape return scale * np.squeeze( np.random.standard_normal(size + _shape).dot( self.cholT)) + self.center def __copy__(self): return normal_sampler(self.center.copy(), self.covariance.copy()) observed_sampler = normal_sampler(S, 1 / n * np.identity(2)) def algo_constructor(): def myalgo(sampler): min_success = 1 ntries = 3 success = 0 for _ in range(ntries): noisyS = sampler(scale=0.5) success += noisyS.sum() > 0.2 / np.sqrt(n) return success >= min_success return myalgo # run selection algorithm algo_instance = algo_constructor() observed_outcome = algo_instance(observed_sampler) # find the target, based on the observed outcome def compute_target(observed_outcome, data): if observed_outcome: # target is truth[0] observed_target, target_cov, cross_cov = sufficient_stat( data)[0], 1 / n * np.identity(1), np.array([1., 0.]).reshape( (2, 1)) / n else: observed_target, target_cov, cross_cov = sufficient_stat( data)[1], 1 / n * np.identity(1), np.array([0., 1.]).reshape( (2, 1)) / n return observed_target, target_cov, cross_cov observed_target, target_cov, cross_cov = compute_target( observed_outcome, data) direction = cross_cov.dot(np.linalg.inv(target_cov)) if observed_outcome: true_target = truth[0] # natural parameter else: true_target = truth[1] # natural parameter def learning_proposal(n=100): scale = np.random.choice([0.5, 1, 1.5, 2], 1) return np.random.standard_normal() * scale / np.sqrt( n) + observed_target def logit_fit(T, Y): rpy2.robjects.numpy2ri.activate() rpy.r.assign('T', T) rpy.r.assign('Y', Y.astype(np.int)) rpy.r(''' Y = as.numeric(Y) T = as.numeric(T) M = glm(Y ~ ns(T, 10), family=binomial(link='logit')) fitfn = function(t) { predict(M, newdata=data.frame(T=t), type='link') } ''') rpy2.robjects.numpy2ri.deactivate() def fitfn(t): rpy2.robjects.numpy2ri.activate() fitfn_r = rpy.r('fitfn') val = fitfn_r(t) rpy2.robjects.numpy2ri.deactivate() return np.exp(val) / (1 + np.exp(val)) return fitfn def probit_fit(T, Y): rpy2.robjects.numpy2ri.activate() rpy.r.assign('T', T) rpy.r.assign('Y', Y.astype(np.int)) rpy.r(''' Y = as.numeric(Y) T = as.numeric(T) M = glm(Y ~ ns(T, 10), family=binomial(link='probit')) fitfn = function(t) { predict(M, newdata=data.frame(T=t), type='link') } ''') rpy2.robjects.numpy2ri.deactivate() def fitfn(t): rpy2.robjects.numpy2ri.activate() fitfn_r = rpy.r('fitfn') val = fitfn_r(t) rpy2.robjects.numpy2ri.deactivate() return ndist.cdf(val) return fitfn def learn_weights(algorithm, observed_sampler, learning_proposal, fit_probability, B=15000): S = selection_stat = observed_sampler.center new_sampler = copy(observed_sampler) learning_sample = [] for _ in range(B): T = learning_proposal( ) # a guess at informative distribution for learning what we want new_sampler = copy(observed_sampler) new_sampler.center = S + direction.dot(T - observed_target) Y = algorithm(new_sampler) == observed_outcome learning_sample.append((T[0], Y)) learning_sample = np.array(learning_sample) T, Y = learning_sample.T conditional_law = fit_probability(T, Y) return conditional_law weight_fn = learn_weights(algo_instance, observed_sampler, learning_proposal, probit_fit) # let's form the pivot target_val = np.linspace(-1, 1, 1001) weight_val = weight_fn(target_val) weight_val *= ndist.pdf(target_val / np.sqrt(target_cov[0, 0])) if observed_outcome: plt.plot(target_val, np.log(weight_val), 'k') else: plt.plot(target_val, np.log(weight_val), 'r') # for p == 1 targets this is what we do -- have some code for multidimensional too print('(true, observed):', true_target, observed_target) exp_family = discrete_family(target_val, weight_val) pivot = exp_family.cdf(true_target / target_cov[0, 0], x=observed_target) interval = exp_family.equal_tailed_interval(observed_target, alpha=0.1) return (pivot, (interval[0] * target_cov[0, 0] < true_target) * (interval[1] * target_cov[0, 0] > true_target), (interval[1] - interval[0]) * target_cov[0, 0])
def pval(vec_state, full_projection, X, obs_residuals, beta_unpenalized, full_null, signs, lam, epsilon, nonzero, active, Sigma, weights, randomization_dist, randomization_scale, Langevin_steps, step_size, burning, X_scaled): """ """ n, p = X.shape null = [] alt = [] X_E = X[:, active] inactive = ~active nalpha = n nactive = np.sum(active) ninactive = np.sum(inactive) active_set = np.where(active)[0] print "true nonzero ", nonzero, "active set", active_set XEpinv = np.linalg.pinv(X[:, active]) hessian = np.dot(X.T, X) hessian_reistricted = hessian[:, active] mat = XEpinv.dot(np.diag(obs_residuals)) if set(nonzero).issubset(active_set): for j, idx in enumerate(active_set): #if j>0: # break eta = np.zeros(nactive) eta[j] = 1 sigma_eta_sq = Sigma[j, j] linear_part = np.identity(nactive) - ( np.outer(np.dot(Sigma, eta), eta) / sigma_eta_sq) #P = np.dot(linear_part.T, np.linalg.pinv(linear_part).T) #T_minus_j = np.dot(P, beta_unpenalized) T_minus_j = np.dot( linear_part, beta_unpenalized) # sufficient stat for the nuisance c = np.dot(Sigma, eta) / sigma_eta_sq fixed_part = full_null + hessian_reistricted.dot(T_minus_j) XXc = hessian_reistricted.dot(c) if (X_scaled == False): fixed_part /= np.sqrt(n) hessian /= np.sqrt(n) hessian_reistricted /= np.sqrt(n) XXc /= np.sqrt(n) def full_gradient(vec_state, fixed_part=fixed_part, obs_residuals=obs_residuals, eta=eta, lam=lam, epsilon=epsilon, active=active, inactive=inactive): nactive = np.sum(active) ninactive = np.sum(inactive) alpha = vec_state[:n] betaE = vec_state[n:(n + nactive)] cube = vec_state[(n + nactive):] beta_full = np.zeros(p) beta_full[active] = betaE subgradient = np.zeros(p) subgradient[inactive] = lam * cube subgradient[active] = lam * signs opt_vec = epsilon * beta_full + subgradient beta_bar_j_boot = np.inner(mat[j, :], alpha) omega = -fixed_part - XXc * beta_bar_j_boot + np.dot( hessian_reistricted, betaE) + opt_vec if randomization_dist == "laplace": randomization_derivative = np.sign( omega ) / randomization_scale # sign(w), w=grad+\epsilon*beta+lambda*u if randomization_dist == "logistic": omega_scaled = omega / randomization_scale randomization_derivative = -(np.exp(-omega_scaled) - 1) / ( np.exp(-omega_scaled) + 1) randomization_derivative /= randomization_scale if randomization_dist == "normal": randomization_derivative = omega / (randomization_scale**2) A = hessian + epsilon * np.identity(nactive + ninactive) A_restricted = A[:, active] _gradient = np.zeros(n + nactive + ninactive) # saturated model mat_q = np.outer(XXc, eta).dot(mat) _gradient[:n] = np.dot(mat_q.T, randomization_derivative) if (weights == 'exponential'): _gradient[:n] -= np.ones(n) if (weights == "normal"): _gradient[:n] -= alpha if weights == "gamma": _gradient[:n] = 3. / (alpha + 2) - 2 if (weights == "gumbel"): gumbel_beta = np.sqrt(6) / (1.14 * np.pi) euler = 0.57721 gumbel_mu = -gumbel_beta * euler gumbel_sigma = 1. / 1.14 _gradient[:n] -= ( 1. - np.exp(-(alpha * gumbel_sigma - gumbel_mu) / gumbel_beta)) * gumbel_sigma / gumbel_beta if weights == "neutral": _gradient[:n] -= (beta_bar_j_boot / sigma_eta_sq) * np.dot( mat.T, eta) _gradient[n:( n + nactive)] = -A_restricted.T.dot(randomization_derivative) _gradient[( n + nactive):] = -lam * randomization_derivative[inactive] # selected model # _gradient[:nactive] = - (np.dot(Sigma_T_inv, data[:nactive]) + np.dot(hessian[:, active].T, sign_vec)) # _gradient[ndata:(ndata + nactive)] = np.dot(A_restricted.T, sign_vec) # _gradient[(ndata + nactive):] = lam * sign_vec[inactive] return _gradient sampler = projected_langevin(vec_state.copy(), full_gradient, full_projection, step_size) samples = [] for i in range(Langevin_steps): sampler.next() if (i > burning) and (i % 3 == 0): samples.append(sampler.state.copy()) samples = np.array(samples) alpha_samples = samples[:, :n] beta_samples = samples[:, n:(n + nactive)] beta_bars = [ np.dot(XEpinv, np.diag(obs_residuals)).dot(alpha_samples[i, :].T) for i in range(len(samples)) ] pop = [z[j] for z in beta_bars] obs = beta_unpenalized[j] #pop = [np.linalg.norm(beta_samples[i,:]) for i in range(beta_samples.shape[0])] #obs = np.linalg.norm(vec_state[n:(n+nactive)]) fam = discrete_family(pop, np.ones_like(pop)) pval = fam.cdf(0, obs) pval = 2 * min(pval, 1 - pval) print "observed: ", obs, "p value: ", pval #if pval < 0.0001: # print obs, pval, np.percentile(pop, [0.2,0.4,0.6,0.8,1.0]) if idx in nonzero: alt.append(pval) else: null.append(pval) return null, alt
def pval(vec_state, full_projection, X, obs_residuals, beta_unpenalized, full_null, signs, lam, epsilon, nonzero, active, Sigma, weights, randomization_dist, randomization_scale, Langevin_steps, step_size, burning, X_scaled): """ """ n, p = X.shape null = [] alt = [] X_E = X[:, active] inactive = ~active nalpha = n nactive = np.sum(active) ninactive = np.sum(inactive) active_set = np.where(active)[0] print "true nonzero ", nonzero, "active set", active_set XEpinv = np.linalg.pinv(X[:, active]) hessian = np.dot(X.T, X) hessian_restricted = hessian[:, active] mat = XEpinv.dot(np.diag(obs_residuals)) SigmaE_inv = np.linalg.inv(Sigma[:nactive, :nactive]) if set(nonzero).issubset(active_set): def full_gradient(vec_state, obs_residuals=obs_residuals, lam=lam, epsilon=epsilon, active=active, inactive=inactive): nactive = np.sum(active) ninactive = np.sum(inactive) alpha = vec_state[:n] betaE = vec_state[n:(n + nactive)] cube = vec_state[(n + nactive):] p = X.shape[1] beta_full = np.zeros(p) beta_full[active] = betaE subgradient = np.zeros(p) subgradient[inactive] = lam * cube subgradient[active] = lam * signs opt_vec = epsilon * beta_full + subgradient beta_bar_boot = mat.dot(alpha) omega = -full_null - np.dot( hessian_restricted, beta_bar_boot) + np.dot( hessian_restricted, betaE) + opt_vec if randomization_dist == "laplace": randomization_derivative = np.sign( omega ) / randomization_scale # sign(w), w=grad+\epsilon*beta+lambda*u if randomization_dist == "logistic": randomization_derivative = -(np.exp(-omega) - 1) / ( np.exp(-omega) + 1) A = hessian + epsilon * np.identity(nactive + ninactive) A_restricted = A[:, active] _gradient = np.zeros(n + nactive + ninactive) # saturated model mat_q = np.dot(hessian_restricted, mat) _gradient[:n] = np.dot(mat_q.T, randomization_derivative) if (weights == 'exponential'): _gradient[:n] -= np.ones(n) if (weights == "normal"): _gradient[:n] -= alpha if weights == "gamma": _gradient[:n] = 3. / (alpha + 2) - 2 if (weights == "gumbel"): gumbel_beta = np.sqrt(6) / (1.14 * np.pi) euler = 0.57721 gumbel_mu = -gumbel_beta * euler gumbel_sigma = 1. / 1.14 _gradient[:n] -= ( 1. - np.exp(-(alpha * gumbel_sigma - gumbel_mu) / gumbel_beta)) * gumbel_sigma / gumbel_beta if weights == "neutral": _gradient[:n] -= np.dot(mat.T, np.dot(SigmaE_inv, beta_bar_boot)) _gradient[n:( n + nactive)] = -A_restricted.T.dot(randomization_derivative) _gradient[(n + nactive):] = -lam * randomization_derivative[inactive] # selected model # _gradient[:nactive] = - (np.dot(Sigma_T_inv, data[:nactive]) + np.dot(hessian[:, active].T, sign_vec)) # _gradient[ndata:(ndata + nactive)] = np.dot(A_restricted.T, sign_vec) # _gradient[(ndata + nactive):] = lam * sign_vec[inactive] return _gradient sampler = projected_langevin(vec_state.copy(), full_gradient, full_projection, 1. / p) samples = [] for i in range(Langevin_steps): sampler.next() if (i > burning): samples.append(sampler.state.copy()) samples = np.array(samples) alpha_samples = samples[:, :n] beta_bars = [ np.dot(XEpinv, np.diag(obs_residuals)).dot(alpha_samples[i, :].T) for i in range(len(samples)) ] pop = [np.linalg.norm(z) for z in beta_bars] obs = np.linalg.norm(beta_unpenalized) fam = discrete_family(pop, np.ones_like(pop)) pval = fam.cdf(0, obs) pval = 2 * min(pval, 1 - pval) print "observed: ", obs, "p value: ", pval #if pval < 0.0001: # print obs, pval, np.percentile(pop, [0.2,0.4,0.6,0.8,1.0]) #if idx in nonzero: # alt.append(pval) #else: # null.append(pval) return [pval], [0]
T = learning_proposal() # a guess at informative distribution for learning what we want new_sampler = copy(observed_sampler) new_sampler.center = S + direction.dot(T - observed_target) Y = algorithm(new_sampler) == observed_outcome learning_sample.append((T, Y)) learning_sample = np.array(learning_sample) T, Y = learning_sample.T conditional_law = fit_probability(T, Y) return conditional_law weight_fn = learn_weights(algo_instance, observed_sampler, learning_proposal, logit_fit) # let's form the pivot target_val = np.linspace(-1, 1, 1001) weight_val = weight_fn(target_val) weight_val *= ndist.pdf(target_val / np.sqrt(target_cov[0,0])) if observed_outcome: plt.plot(target_val, np.log(weight_val), 'k') else: plt.plot(target_val, np.log(weight_val), 'r') # for p == 1 targets this is what we do -- have some code for multidimensional too print('(true, observed):', true_target, observed_target) exp_family = discrete_family(target_val, weight_val) pivot = exp_family.cdf(true_target / target_cov[0, 0], x=observed_target) interval = exp_family.equal_tailed_interval(observed_target, alpha=0.1) # for natural parameter, must be rescaled
def pval(sampler, loss_args, linear_part, data, nonzero): """ The function computes the null and alternative pvalues for a regularized problem. Parameters: ----------- loss: specific loss, e.g. gaussian_Xfixed, logistic_Xrandom penalty: regularization, e.g. selective_l1norm randomization: the distribution of the randomized noise linear part, data: (C, y) To test for the jth parameter, we condition on the C_{\backslash j} y = d_{\backslash j}. nonzero: the true underlying nonzero pattern sigma: noise level of the data, if "None", estimates of covariance is needed Returns: -------- null, alt: null and alternative pvalues. """ n, p = sampler.loss.X.shape data0 = data.copy() active = sampler.penalty.active_set if linear_part is None: off = ~np.identity(p, dtype=bool) E = np.zeros((p,p), dtype=bool) E[off] = active E = np.logical_or(E.T, E) active_set = np.where(E[off])[0] else: active_set = np.where(active)[0] print "true nonzero ", nonzero, "nonzero coefs", active_set null = [] alt = [] if set(nonzero).issubset(active_set): for _, idx in enumerate(active_set): if linear_part is not None: eta = linear_part[:,idx] keep = np.copy(active) keep[idx] = False L = linear_part[:,keep] loss_args['linear_part'] = L.T loss_args['value'] = np.dot(L.T, data) sampler.setup_sampling(data, loss_args=loss_args) samples = sampler.sampling(ndraw=5000, burnin=1000) pop = [np.dot(eta, z) for z, _, in samples] obs = np.dot(eta, data0) else: row, col = nonzero_index(idx, p) print row, col eta = data0[:, row] sampler.setup_sampling(data, loss_args=loss_args) samples = sampler.sampling(ndraw=5000, burnin=1000) pop = [np.dot(eta, z[:, col]) for z, _, in samples] obs = np.dot(eta, data0[:, col]) fam = discrete_family(pop, np.ones_like(pop)) pval = fam.cdf(0, obs) pval = 2 * min(pval, 1-pval) print "observed: ", obs, "p value: ", pval if pval < 0.0001: print obs, pval, np.percentile(pop, [0.2,0.4,0.6,0.8,1.0]) if idx in nonzero: alt.append(pval) else: null.append(pval) print 'opt_vars', sampler.penalty.accept_l1_part, sampler.penalty.total_l1_part print 'data', sampler.loss.accept_data, sampler.loss.total_data return null, alt
def pval(vec_state, full_gradient, full_projection, move_data, bootstrap_samples, X, y, nonzero, active): """ """ n, p = X.shape y0 = y.copy() null = [] alt = [] X_E = X[:, active] ndata = y.shape[0] active_set = np.where(active)[0] print "true nonzero ", nonzero, "active set", active_set if set(nonzero).issubset(active_set): for j, idx in enumerate(active_set): eta = X[:, idx] #keep = np.copy(active) keep = np.ones(p, dtype=bool) keep[idx] = False linear_part = X[:, keep].T P = np.dot(linear_part.T, np.linalg.pinv(linear_part).T) I = np.identity(linear_part.shape[1]) R = I - P sampler = projected_langevin(vec_state.copy(), full_gradient, full_projection, 1. / (2 * p)) samples = [] boot_samples = bootstrap_samples(y0, P, R) for _ in range(1000): sampler.next() new_data = move_data(sampler.state, boot_samples) sampler.state[:ndata] = new_data samples.append(sampler.state.copy()) samples = np.array(samples) data_samples = samples[:, :n] pop = [np.dot(eta, z) for z in data_samples] obs = np.dot(eta, y0) fam = discrete_family(pop, np.ones_like(pop)) pval = fam.cdf(0, obs) pval = 2 * min(pval, 1 - pval) print "observed: ", obs, "p value: ", pval #if pval < 0.0001: # print obs, pval, np.percentile(pop, [0.2,0.4,0.6,0.8,1.0]) if idx in nonzero: alt.append(pval) else: null.append(pval) return null, alt
def test_data_carving_IC(n=600, p=100, s=10, sigma=5, rho=0.25, signal=(3.5,5.), split_frac=0.9, ndraw=25000, burnin=5000, df=np.inf, coverage=0.90, compute_intervals=False): X, y, beta, active, sigma, _ = gaussian_instance(n=n, p=p, s=s, sigma=sigma, rho=rho, signal=signal, df=df, equicorrelated=False) mu = np.dot(X, beta) splitn = int(n*split_frac) indices = np.arange(n) np.random.shuffle(indices) stage_one = indices[:splitn] FS = info_crit_stop(y, X, sigma, cost=np.log(n), subset=stage_one) con = FS.constraints() X_E = X[:,FS.active] X_Ei = np.linalg.pinv(X_E) beta_bar = X_Ei.dot(y) mu_E = X_E.dot(beta_bar) sigma_E = np.linalg.norm(y-mu_E) / np.sqrt(n - len(FS.active)) con.mean[:] = mu_E con.covariance = sigma_E**2 * np.identity(n) print(sigma_E, sigma) Z = sample_from_constraints(con, y, ndraw=ndraw, burnin=burnin) pvalues = [] for idx, var in enumerate(FS.active): active = copy(FS.active) active.remove(var) X_r = X[:,active] # restricted design mu_r = X_r.dot(np.linalg.pinv(X_r).dot(y)) delta_mu = (mu_r - mu_E) / sigma_E**2 W = np.exp(Z.dot(delta_mu)) fam = discrete_family(Z.dot(X_Ei[idx].T), W) pval = fam.cdf(0, x=beta_bar[idx]) pval = 2 * min(pval, 1 - pval) pvalues.append((pval, beta[var])) return pvalues