def test_conditional(): p = 200 k1, k2 = 5, 3 b = np.random.standard_normal((k1,)) A = np.random.standard_normal((k1,p)) con = AC.constraints(A,b) w = np.random.standard_normal(p) con.mean = w C = np.random.standard_normal((k2,p)) d = np.random.standard_normal(k2) new_con = con.conditional(C, d) while True: W = np.random.standard_normal(p) W -= np.dot(np.linalg.pinv(C), np.dot(C, W) - d) if new_con(W) and con(W): break Z = AC.sample_from_constraints(new_con, W, ndraw=5000) tol = 0 nt.assert_true(np.linalg.norm(np.dot(Z, C.T) - d[None,:]) < 1.e-7) V = (np.dot(Z, new_con.linear_part.T) - new_con.offset[None,:]).max(1) V2 = (np.dot(Z, con.linear_part.T) - con.offset[None,:]).max(1) print ('failing:', (V>tol).sum(), (V2>tol).sum(), np.linalg.norm(np.dot(C, W) - d)) nt.assert_true(np.sum(V > tol) < 0.001*V.shape[0])
def test_conditional_simple(): A = np.ones((1, 2)) b = np.array([1]) con = AC.constraints(A, b) #X1+X2<= 1 C = np.array([[0, 1]]) d = np.array([2]) #X2=2 new_con = con.conditional(C, d) while True: W = np.random.standard_normal(2) W -= np.dot(np.linalg.pinv(C), np.dot(C, W) - d) if con(W): break Z1 = AC.sample_from_constraints(new_con, W, ndraw=10000) counter = 0 new_sample = [] while True: W = np.random.standard_normal() # conditional distribution if W < -1: new_sample.append(W) counter += 1 if counter >= 10000: break a1 = Z1[:, 0] a2 = np.array(new_sample) test = np.fabs( (a1.mean() - a2.mean()) / (np.std(a1) * np.sqrt(2)) * np.sqrt(10000)) nt.assert_true(test < 5)
def test_conditional(): p = 200 k1, k2 = 5, 3 b = np.random.standard_normal((k1, )) A = np.random.standard_normal((k1, p)) con = AC.constraints(A, b) w = np.random.standard_normal(p) con.mean = w C = np.random.standard_normal((k2, p)) d = np.random.standard_normal(k2) new_con = con.conditional(C, d) while True: W = np.random.standard_normal(p) W -= np.dot(np.linalg.pinv(C), np.dot(C, W) - d) if new_con(W) and con(W): break Z = AC.sample_from_constraints(new_con, W, ndraw=5000) tol = 0 nt.assert_true(np.linalg.norm(np.dot(Z, C.T) - d[None, :]) < 1.e-7) V = (np.dot(Z, new_con.linear_part.T) - new_con.offset[None, :]).max(1) V2 = (np.dot(Z, con.linear_part.T) - con.offset[None, :]).max(1) print('failing:', (V > tol).sum(), (V2 > tol).sum(), np.linalg.norm(np.dot(C, W) - d)) nt.assert_true(np.sum(V > tol) < 0.001 * V.shape[0])
def test_conditional_simple(): A = np.ones((1,2)) b = np.array([1]) con = AC.constraints(A,b) #X1+X2<= 1 C = np.array([[0,1]]) d = np.array([2]) #X2=2 new_con = con.conditional(C,d) while True: W = np.random.standard_normal(2) W -= np.dot(np.linalg.pinv(C), np.dot(C, W) - d) if con(W): break Z1 = AC.sample_from_constraints(new_con, W, ndraw=10000) counter = 0 new_sample = [] while True: W = np.random.standard_normal() # conditional distribution if W < -1: new_sample.append(W) counter += 1 if counter >= 10000: break a1 = Z1[:,0] a2 = np.array(new_sample) test = np.fabs((a1.mean() - a2.mean()) / (np.std(a1) * np.sqrt(2)) * np.sqrt(10000)) nt.assert_true(test < 5)
def test_chisq_central(nsim=None, burnin=8000, ndraw=2000): n, p = 4, 10 A, b = np.random.standard_normal((n, p)), np.zeros(n) con = AC.constraints(A,b) while True: z = np.random.standard_normal(p) if con(z): break S = np.identity(p)[:3] Z = AC.sample_from_constraints(con, z, ndraw=ndraw, burnin=burnin) P = [] for i in range(Z.shape[0]/10): P.append(chisq.quadratic_test(Z[10*i], S, con)) # no plots in the test! # ecdf = sm.distributions.ECDF(P) # plt.clf() # x = np.linspace(0,1,101) # plt.plot(x, ecdf(x), c='red') # plt.plot([0,1],[0,1], c='blue', linewidth=2) nt.assert_true(np.fabs(np.mean(P)-0.5) < 0.03) nt.assert_true(np.fabs(np.std(P)-1/np.sqrt(12)) < 0.03)
def test_chisq_central(nsim=None, burnin=8000, ndraw=2000): n, p = 4, 10 A, b = np.random.standard_normal((n, p)), np.zeros(n) con = AC.constraints(A, b) while True: z = np.random.standard_normal(p) if con(z): break S = np.identity(p)[:3] Z = AC.sample_from_constraints(con, z, ndraw=ndraw, burnin=burnin) P = [] for i in range(Z.shape[0] / 10): P.append(chisq.quadratic_test(Z[10 * i], S, con)) # no plots in the test! # ecdf = sm.distributions.ECDF(P) # plt.clf() # x = np.linspace(0,1,101) # plt.plot(x, ecdf(x), c='red') # plt.plot([0,1],[0,1], c='blue', linewidth=2) nt.assert_true(np.fabs(np.mean(P) - 0.5) < 0.03) nt.assert_true(np.fabs(np.std(P) - 1 / np.sqrt(12)) < 0.03)
def test_sampling(): """ See that means and covariances are approximately correct """ C = AC.constraints(np.identity(3), np.inf*np.ones(3)) C.mean = np.array([3,4,5.2]) W = np.random.standard_normal((5,3)) S = np.dot(W.T, W) / 30. C.covariance = S V = AC.sample_from_constraints(C, np.zeros(3), ndraw=500000) nt.assert_true(np.linalg.norm(V.mean(0)-C.mean) < 0.01) nt.assert_true(np.linalg.norm(np.einsum('ij,ik->ijk', V, V).mean(0) - np.outer(V.mean(0), V.mean(0)) - S) < 0.01)
def test_simulate_nonwhitened(): n, p = 50, 200 X = np.random.standard_normal((n,p)) cov = np.dot(X.T, X) W = np.random.standard_normal((3,p)) con = AC.constraints(W, np.ones(3), covariance=cov) while True: z = np.random.standard_normal(p) if np.dot(W, z).max() <= 1: break Z = AC.sample_from_constraints(con, z) nt.assert_true((np.dot(Z, W.T) - 1).max() < 0)
def test_simulate_nonwhitened(): n, p = 50, 200 X = np.random.standard_normal((n, p)) cov = np.dot(X.T, X) W = np.random.standard_normal((3, p)) con = AC.constraints(W, 3 * np.ones(3), covariance=cov) while True: z = np.random.standard_normal(p) if np.dot(W, z).max() <= 3: break Z = AC.sample_from_constraints(con, z, burnin=100, ndraw=100) nt.assert_true((np.dot(Z, W.T) - 3).max() < 1.e-5)
def test_chisq_noncentral(nsim=1000, burnin=2000, ndraw=8000): mu = np.arange(6) ncp = np.linalg.norm(mu[:3])**2 A, b = np.random.standard_normal((4, 6)), np.zeros(4) con = AC.constraints(A, b, mean=mu) ro.numpy2ri.activate() ro.r('fncp=%f' % ncp) ro.r('f = function(x) {pchisq(x,3,ncp=fncp)}') def F(x): if x != np.inf: return np.array(ro.r('f(%f)' % x)) else: return np.array([1.]) # find a feasible point while True: z = np.random.standard_normal(mu.shape) if con(z): break P = [] for i in range(nsim): Z = AC.sample_from_constraints(con, z, ndraw=ndraw, burnin=burnin) u = Z[-1] u[:3] = u[:3] / np.linalg.norm(u[:3]) L, V, U = con.bounds(u, Z[-1])[:3] if L > 0: Ln = L**2 Un = U**2 Vn = V**2 else: Ln = 0 Un = U**2 Vn = V**2 P.append(np.array((F(Un) - F(Vn)) / (F(Un) - F(Ln)))) P = np.array(P).reshape(-1) P = P[P > 0] P = P[P < 1] ro.numpy2ri.deactivate()
def test_chisq_central(nsim=None, burnin=8000, ndraw=2000): n, p = 4, 10 A, b = np.random.standard_normal((n, p)), np.zeros(n) con = AC.constraints(A, b) while True: z = np.random.standard_normal(p) if con(z): break S = np.identity(p)[:3] Z = AC.sample_from_constraints(con, z, ndraw=ndraw, burnin=burnin) P = [] for i in range(int(Z.shape[0] / 10)): P.append(chisq.quadratic_test(Z[10 * i], S, con)) nt.assert_true(np.fabs(np.mean(P) - 0.5) < 0.03) nt.assert_true(np.fabs(np.std(P) - 1 / np.sqrt(12)) < 0.03)
def test_chisq_noncentral(nsim=1000, burnin=2000, ndraw=8000): mu = np.arange(6) ncp = np.linalg.norm(mu[:3])**2 A, b = np.random.standard_normal((4,6)), np.zeros(4) con = AC.constraints(A,b, mean=mu) ro.r('fncp=%f' % ncp) ro.r('f = function(x) {pchisq(x,3,ncp=fncp)}') def F(x): if x != np.inf: return np.array(ro.r('f(%f)' % x)) else: return np.array([1.]) # find a feasible point while True: z = np.random.standard_normal(mu.shape) if con(z): break P = [] for i in range(nsim): Z = AC.sample_from_constraints(con, z, ndraw=ndraw, burnin=burnin) u = Z[-1] u[:3] = u[:3] / np.linalg.norm(u[:3]) L, V, U = con.bounds(u, Z[-1])[:3] if L > 0: Ln = L**2 Un = U**2 Vn = V**2 else: Ln = 0 Un = U**2 Vn = V**2 P.append(np.array((F(Un) - F(Vn)) / (F(Un) - F(Ln)))) P = np.array(P).reshape(-1) P = P[P > 0] P = P[P < 1]
def test_pivots_intervals(): A, b = np.random.standard_normal((4, 30)), np.random.standard_normal(4) con = AC.constraints(A, b) while True: w = np.random.standard_normal(30) if con(w): break Z = AC.sample_from_constraints(con, w)[-1] u = np.zeros(con.dim) u[4] = 1 # call pivot con.pivot(u, Z) con.pivot(u, Z, alternative='less') con.pivot(u, Z, alternative='greater') con.interval(u, Z, UMAU=True) con.interval(u, Z, UMAU=False)
def test_pivots_intervals(): A, b = np.random.standard_normal((4,30)), np.random.standard_normal(4) con = AC.constraints(A,b) while True: w = np.random.standard_normal(30) if con(w): break Z = AC.sample_from_constraints(con, w)[-1] u = np.zeros(con.dim) u[4] = 1 # call pivot con.pivot(u, Z) con.pivot(u, Z, alternative='less') con.pivot(u, Z, alternative='greater') con.interval(u, Z, UMAU=True) con.interval(u, Z, UMAU=False)
def test_data_carving_IC(n=600, p=100, s=10, sigma=5, rho=0.25, signal=(3.5,5.), split_frac=0.9, ndraw=25000, burnin=5000, df=np.inf, coverage=0.90, compute_intervals=False): X, y, beta, active, sigma, _ = gaussian_instance(n=n, p=p, s=s, sigma=sigma, rho=rho, signal=signal, df=df, equicorrelated=False) mu = np.dot(X, beta) splitn = int(n*split_frac) indices = np.arange(n) np.random.shuffle(indices) stage_one = indices[:splitn] FS = info_crit_stop(y, X, sigma, cost=np.log(n), subset=stage_one) con = FS.constraints() X_E = X[:,FS.active] X_Ei = np.linalg.pinv(X_E) beta_bar = X_Ei.dot(y) mu_E = X_E.dot(beta_bar) sigma_E = np.linalg.norm(y-mu_E) / np.sqrt(n - len(FS.active)) con.mean[:] = mu_E con.covariance = sigma_E**2 * np.identity(n) print(sigma_E, sigma) Z = sample_from_constraints(con, y, ndraw=ndraw, burnin=burnin) pvalues = [] for idx, var in enumerate(FS.active): active = copy(FS.active) active.remove(var) X_r = X[:,active] # restricted design mu_r = X_r.dot(np.linalg.pinv(X_r).dot(y)) delta_mu = (mu_r - mu_E) / sigma_E**2 W = np.exp(Z.dot(delta_mu)) fam = discrete_family(Z.dot(X_Ei[idx].T), W) pval = fam.cdf(0, x=beta_bar[idx]) pval = 2 * min(pval, 1 - pval) pvalues.append((pval, beta[var])) return pvalues
def compute_sampler_quantiles(n=500, p=100, signal_fac=1.2, s=5, sigma=1., rho=0., randomizer_scale=1, full_dispersion=True): inst, const = gaussian_instance, lasso.gaussian signal = np.sqrt(signal_fac * 2 * np.log(p)) while True: X, Y, beta = inst(n=n, p=p, signal=signal, s=s, equicorrelated=False, rho=rho, sigma=sigma, random_signs=True)[:3] idx = np.arange(p) sigmaX = rho ** np.abs(np.subtract.outer(idx, idx)) print("snr", beta.T.dot(sigmaX).dot(beta) / ((sigma ** 2.) * n)) n, p = X.shape if full_dispersion: dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p) sigma_ = np.sqrt(dispersion) W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_ conv = const(X, Y, W, randomizer_scale=randomizer_scale * sigma_) signs = conv.fit() nonzero = signs != 0 (observed_target, cov_target, cov_target_score, alternatives) = selected_targets(conv.loglike, conv._W, nonzero, dispersion=dispersion) true_mean = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta)) estimate, observed_info_mean, _, pval, intervals, _ = conv.selective_MLE(observed_target, cov_target, cov_target_score, alternatives) opt_linear, opt_offset = conv.opt_transform target_precision = np.linalg.inv(cov_target) randomizer_cov, randomizer_precision = conv.randomizer.cov_prec score_linear = np.identity(p) target_linear = score_linear.dot(cov_target_score.T.dot(target_precision)) target_offset = conv.observed_score_state - target_linear.dot(observed_target) nopt = opt_linear.shape[1] ntarget = target_linear.shape[1] implied_precision = np.zeros((ntarget + nopt, ntarget + nopt)) implied_precision[:ntarget, :ntarget] = target_linear.T.dot(randomizer_precision).dot(target_linear) + target_precision implied_precision[:ntarget, ntarget:] = target_linear.T.dot(randomizer_precision).dot(opt_linear) implied_precision[ntarget:, :ntarget] = opt_linear.T.dot(randomizer_precision).dot(target_linear) implied_precision[ntarget:, ntarget:] = opt_linear.T.dot(randomizer_precision).dot(opt_linear) implied_cov = np.linalg.inv(implied_precision) conditioned_value = target_offset + opt_offset implied_mean = implied_cov.dot(np.hstack((target_precision.dot(true_mean)-target_linear.T.dot(randomizer_precision).dot(conditioned_value), -opt_linear.T.dot(randomizer_precision).dot(conditioned_value)))) A_scaling = np.zeros((nopt, ntarget+nopt)) A_scaling[:,ntarget:] = -np.identity(nopt) b_scaling = np.zeros(nopt) affine_con = constraints(A_scaling, b_scaling, mean=implied_mean, covariance=implied_cov) initial_point = np.zeros(ntarget+nopt) initial_point[ntarget:] = conv.observed_opt_state sampler = sample_from_constraints(affine_con, initial_point, ndraw=500000, burnin=1000) print("sampler", sampler.shape, sampler[:,:ntarget].shape) mle_sample = [] for j in range(sampler.shape[0]): estimate, _, _, _, _, _ = conv.selective_MLE(sampler[j,:ntarget], cov_target, cov_target_score, alternatives) mle_sample.append(estimate) print("iteration ", j) mle_sample = np.asarray(mle_sample) print("check", mle_sample.shape, np.mean(mle_sample, axis=0) - true_mean) for i in range(nonzero.sum()): temp = 251 + i ax = plt.subplot(temp) stats.probplot(mle_sample[:,i], dist="norm", plot=pylab) plt.subplots_adjust(hspace=.5, wspace=.5) pylab.show() sampler_quantiles = np.vstack([np.percentile(mle_sample, 5, axis=0), np.percentile(mle_sample, 95, axis=0)]) normal_quantiles = np.vstack((norm.ppf(0.05, loc=true_mean, scale=np.sqrt(np.diag(observed_info_mean))), norm.ppf(0.95, loc=true_mean, scale=np.sqrt(np.diag(observed_info_mean))))) print("sampler quantiles", sampler_quantiles.T) print("normal quantiles", normal_quantiles.T) break