def test_conditional():

    p = 200
    k1, k2 = 5, 3
    b = np.random.standard_normal((k1,))
    A = np.random.standard_normal((k1,p))
    con = AC.constraints(A,b)
    w = np.random.standard_normal(p)
    con.mean = w
    C = np.random.standard_normal((k2,p))
    d = np.random.standard_normal(k2)
    new_con = con.conditional(C, d)

    while True:
        W = np.random.standard_normal(p)
        W -= np.dot(np.linalg.pinv(C), np.dot(C, W) - d)  
        if new_con(W) and con(W):
            break

    Z = AC.sample_from_constraints(new_con, W, ndraw=5000)

    tol = 0
    
    nt.assert_true(np.linalg.norm(np.dot(Z, C.T) - d[None,:]) < 1.e-7)

    V = (np.dot(Z, new_con.linear_part.T) - new_con.offset[None,:]).max(1)
    V2 = (np.dot(Z, con.linear_part.T) - con.offset[None,:]).max(1)
    print ('failing:', 
           (V>tol).sum(), 
           (V2>tol).sum(), 
           np.linalg.norm(np.dot(C, W) - d))
    nt.assert_true(np.sum(V > tol) < 0.001*V.shape[0])
Exemplo n.º 2
0
def test_conditional_simple():

    A = np.ones((1, 2))
    b = np.array([1])
    con = AC.constraints(A, b)  #X1+X2<= 1

    C = np.array([[0, 1]])
    d = np.array([2])  #X2=2

    new_con = con.conditional(C, d)
    while True:
        W = np.random.standard_normal(2)
        W -= np.dot(np.linalg.pinv(C), np.dot(C, W) - d)
        if con(W):
            break
    Z1 = AC.sample_from_constraints(new_con, W, ndraw=10000)

    counter = 0
    new_sample = []
    while True:
        W = np.random.standard_normal()  # conditional distribution
        if W < -1:
            new_sample.append(W)
            counter += 1

        if counter >= 10000:
            break

    a1 = Z1[:, 0]
    a2 = np.array(new_sample)
    test = np.fabs(
        (a1.mean() - a2.mean()) / (np.std(a1) * np.sqrt(2)) * np.sqrt(10000))
    nt.assert_true(test < 5)
Exemplo n.º 3
0
def test_conditional():

    p = 200
    k1, k2 = 5, 3
    b = np.random.standard_normal((k1, ))
    A = np.random.standard_normal((k1, p))
    con = AC.constraints(A, b)
    w = np.random.standard_normal(p)
    con.mean = w
    C = np.random.standard_normal((k2, p))
    d = np.random.standard_normal(k2)
    new_con = con.conditional(C, d)

    while True:
        W = np.random.standard_normal(p)
        W -= np.dot(np.linalg.pinv(C), np.dot(C, W) - d)
        if new_con(W) and con(W):
            break

    Z = AC.sample_from_constraints(new_con, W, ndraw=5000)

    tol = 0

    nt.assert_true(np.linalg.norm(np.dot(Z, C.T) - d[None, :]) < 1.e-7)

    V = (np.dot(Z, new_con.linear_part.T) - new_con.offset[None, :]).max(1)
    V2 = (np.dot(Z, con.linear_part.T) - con.offset[None, :]).max(1)
    print('failing:', (V > tol).sum(), (V2 > tol).sum(),
          np.linalg.norm(np.dot(C, W) - d))
    nt.assert_true(np.sum(V > tol) < 0.001 * V.shape[0])
def test_conditional_simple():

    A = np.ones((1,2))
    b = np.array([1])
    con = AC.constraints(A,b) #X1+X2<= 1

    C = np.array([[0,1]])
    d = np.array([2])   #X2=2

    new_con = con.conditional(C,d)
    while True:
        W = np.random.standard_normal(2)
        W -= np.dot(np.linalg.pinv(C), np.dot(C, W) - d)  
        if con(W):
            break
    Z1 = AC.sample_from_constraints(new_con, W, ndraw=10000)

    counter = 0
    new_sample = []
    while True:
        W = np.random.standard_normal() # conditional distribution
        if W < -1:
            new_sample.append(W)
            counter += 1

        if counter >= 10000:
            break

    a1 = Z1[:,0]
    a2 = np.array(new_sample)
    test = np.fabs((a1.mean() - a2.mean()) / (np.std(a1) * np.sqrt(2)) * np.sqrt(10000))
    nt.assert_true(test < 5)
def test_chisq_central(nsim=None, burnin=8000, ndraw=2000):

    n, p = 4, 10
    A, b = np.random.standard_normal((n, p)), np.zeros(n)
    con = AC.constraints(A,b)

    while True:
        z = np.random.standard_normal(p)
        if con(z):
            break

    S = np.identity(p)[:3]
    Z = AC.sample_from_constraints(con, z, ndraw=ndraw, burnin=burnin)
    P = []
    for i in range(Z.shape[0]/10):
        P.append(chisq.quadratic_test(Z[10*i], S, con))

#     no plots in the test!
#     ecdf = sm.distributions.ECDF(P)

#     plt.clf()
#     x = np.linspace(0,1,101)
#     plt.plot(x, ecdf(x), c='red')
#     plt.plot([0,1],[0,1], c='blue', linewidth=2)

    nt.assert_true(np.fabs(np.mean(P)-0.5) < 0.03)
    nt.assert_true(np.fabs(np.std(P)-1/np.sqrt(12)) < 0.03)
def test_chisq_central(nsim=None, burnin=8000, ndraw=2000):

    n, p = 4, 10
    A, b = np.random.standard_normal((n, p)), np.zeros(n)
    con = AC.constraints(A, b)

    while True:
        z = np.random.standard_normal(p)
        if con(z):
            break

    S = np.identity(p)[:3]
    Z = AC.sample_from_constraints(con, z, ndraw=ndraw, burnin=burnin)
    P = []
    for i in range(Z.shape[0] / 10):
        P.append(chisq.quadratic_test(Z[10 * i], S, con))


#     no plots in the test!
#     ecdf = sm.distributions.ECDF(P)

#     plt.clf()
#     x = np.linspace(0,1,101)
#     plt.plot(x, ecdf(x), c='red')
#     plt.plot([0,1],[0,1], c='blue', linewidth=2)

    nt.assert_true(np.fabs(np.mean(P) - 0.5) < 0.03)
    nt.assert_true(np.fabs(np.std(P) - 1 / np.sqrt(12)) < 0.03)
def test_sampling():
    """
    See that means and covariances are approximately correct
    """
    C = AC.constraints(np.identity(3), np.inf*np.ones(3))
    C.mean = np.array([3,4,5.2])
    W = np.random.standard_normal((5,3))
    S = np.dot(W.T, W) / 30.
    C.covariance = S
    V = AC.sample_from_constraints(C, np.zeros(3), ndraw=500000)

    nt.assert_true(np.linalg.norm(V.mean(0)-C.mean) < 0.01)
    nt.assert_true(np.linalg.norm(np.einsum('ij,ik->ijk', V, V).mean(0) - 
                                  np.outer(V.mean(0), V.mean(0)) - S) < 0.01)
Exemplo n.º 8
0
def test_sampling():
    """
    See that means and covariances are approximately correct
    """
    C = AC.constraints(np.identity(3), np.inf*np.ones(3))
    C.mean = np.array([3,4,5.2])
    W = np.random.standard_normal((5,3))
    S = np.dot(W.T, W) / 30.
    C.covariance = S
    V = AC.sample_from_constraints(C, np.zeros(3), ndraw=500000)

    nt.assert_true(np.linalg.norm(V.mean(0)-C.mean) < 0.01)
    nt.assert_true(np.linalg.norm(np.einsum('ij,ik->ijk', V, V).mean(0) - 
                                  np.outer(V.mean(0), V.mean(0)) - S) < 0.01)
def test_simulate_nonwhitened():
    n, p = 50, 200

    X = np.random.standard_normal((n,p))
    cov = np.dot(X.T, X)

    W = np.random.standard_normal((3,p))
    con = AC.constraints(W, np.ones(3), covariance=cov)

    while True:
        z = np.random.standard_normal(p)
        if np.dot(W, z).max() <= 1:
            break

    Z = AC.sample_from_constraints(con, z)
    nt.assert_true((np.dot(Z, W.T) - 1).max() < 0)
Exemplo n.º 10
0
def test_simulate_nonwhitened():
    n, p = 50, 200

    X = np.random.standard_normal((n, p))
    cov = np.dot(X.T, X)

    W = np.random.standard_normal((3, p))
    con = AC.constraints(W, 3 * np.ones(3), covariance=cov)

    while True:
        z = np.random.standard_normal(p)
        if np.dot(W, z).max() <= 3:
            break

    Z = AC.sample_from_constraints(con, z, burnin=100, ndraw=100)

    nt.assert_true((np.dot(Z, W.T) - 3).max() < 1.e-5)
Exemplo n.º 11
0
def test_chisq_noncentral(nsim=1000, burnin=2000, ndraw=8000):

    mu = np.arange(6)
    ncp = np.linalg.norm(mu[:3])**2

    A, b = np.random.standard_normal((4, 6)), np.zeros(4)
    con = AC.constraints(A, b, mean=mu)

    ro.numpy2ri.activate()
    ro.r('fncp=%f' % ncp)
    ro.r('f = function(x) {pchisq(x,3,ncp=fncp)}')

    def F(x):
        if x != np.inf:
            return np.array(ro.r('f(%f)' % x))
        else:
            return np.array([1.])

    # find a feasible point

    while True:
        z = np.random.standard_normal(mu.shape)
        if con(z):
            break

    P = []
    for i in range(nsim):
        Z = AC.sample_from_constraints(con, z, ndraw=ndraw, burnin=burnin)
        u = Z[-1]
        u[:3] = u[:3] / np.linalg.norm(u[:3])
        L, V, U = con.bounds(u, Z[-1])[:3]
        if L > 0:
            Ln = L**2
            Un = U**2
            Vn = V**2
        else:
            Ln = 0
            Un = U**2
            Vn = V**2

        P.append(np.array((F(Un) - F(Vn)) / (F(Un) - F(Ln))))

    P = np.array(P).reshape(-1)
    P = P[P > 0]
    P = P[P < 1]
    ro.numpy2ri.deactivate()
Exemplo n.º 12
0
def test_chisq_central(nsim=None, burnin=8000, ndraw=2000):

    n, p = 4, 10
    A, b = np.random.standard_normal((n, p)), np.zeros(n)
    con = AC.constraints(A, b)

    while True:
        z = np.random.standard_normal(p)
        if con(z):
            break

    S = np.identity(p)[:3]
    Z = AC.sample_from_constraints(con, z, ndraw=ndraw, burnin=burnin)
    P = []
    for i in range(int(Z.shape[0] / 10)):
        P.append(chisq.quadratic_test(Z[10 * i], S, con))

    nt.assert_true(np.fabs(np.mean(P) - 0.5) < 0.03)
    nt.assert_true(np.fabs(np.std(P) - 1 / np.sqrt(12)) < 0.03)
def test_chisq_noncentral(nsim=1000, burnin=2000, ndraw=8000):

    mu = np.arange(6)
    ncp = np.linalg.norm(mu[:3])**2

    A, b = np.random.standard_normal((4,6)), np.zeros(4)
    con = AC.constraints(A,b, mean=mu)

    ro.r('fncp=%f' % ncp)
    ro.r('f = function(x) {pchisq(x,3,ncp=fncp)}')
    def F(x):
        if x != np.inf:
            return np.array(ro.r('f(%f)' % x))
        else:
            return np.array([1.])

    # find a feasible point

    while True:
        z = np.random.standard_normal(mu.shape)
        if con(z):
            break

    P = []
    for i in range(nsim):
        Z = AC.sample_from_constraints(con, z, ndraw=ndraw, burnin=burnin)
        u = Z[-1]
        u[:3] = u[:3] / np.linalg.norm(u[:3])
        L, V, U = con.bounds(u, Z[-1])[:3]
        if L > 0:
            Ln = L**2
            Un = U**2
            Vn = V**2
        else:
            Ln = 0
            Un = U**2
            Vn = V**2

        P.append(np.array((F(Un) - F(Vn)) / (F(Un) - F(Ln))))

    P = np.array(P).reshape(-1)
    P = P[P > 0]
    P = P[P < 1]
Exemplo n.º 14
0
def test_pivots_intervals():

    A, b = np.random.standard_normal((4, 30)), np.random.standard_normal(4)

    con = AC.constraints(A, b)
    while True:
        w = np.random.standard_normal(30)
        if con(w):
            break

    Z = AC.sample_from_constraints(con, w)[-1]
    u = np.zeros(con.dim)
    u[4] = 1

    # call pivot
    con.pivot(u, Z)
    con.pivot(u, Z, alternative='less')
    con.pivot(u, Z, alternative='greater')

    con.interval(u, Z, UMAU=True)
    con.interval(u, Z, UMAU=False)
def test_pivots_intervals():

    A, b = np.random.standard_normal((4,30)), np.random.standard_normal(4)

    con = AC.constraints(A,b)
    while True:
        w = np.random.standard_normal(30)
        if con(w):
            break

    Z = AC.sample_from_constraints(con, w)[-1]
    u = np.zeros(con.dim)
    u[4] = 1

    # call pivot
    con.pivot(u, Z)
    con.pivot(u, Z, alternative='less')
    con.pivot(u, Z, alternative='greater')

    con.interval(u, Z, UMAU=True)
    con.interval(u, Z, UMAU=False)
Exemplo n.º 16
0
def test_data_carving_IC(n=600,
                         p=100,
                         s=10,
                         sigma=5,
                         rho=0.25,
                         signal=(3.5,5.),
                         split_frac=0.9,
                         ndraw=25000,
                         burnin=5000, 
                         df=np.inf,
                         coverage=0.90,
                         compute_intervals=False):

    X, y, beta, active, sigma, _ = gaussian_instance(n=n, 
                                                     p=p, 
                                                     s=s, 
                                                     sigma=sigma, 
                                                     rho=rho, 
                                                     signal=signal, 
                                                     df=df,
                                                     equicorrelated=False)
    mu = np.dot(X, beta)
    splitn = int(n*split_frac)
    indices = np.arange(n)
    np.random.shuffle(indices)
    stage_one = indices[:splitn]

    FS = info_crit_stop(y, X, sigma, cost=np.log(n), subset=stage_one)

    con = FS.constraints()

    X_E = X[:,FS.active]
    X_Ei = np.linalg.pinv(X_E)
    beta_bar = X_Ei.dot(y)
    mu_E = X_E.dot(beta_bar)
    sigma_E = np.linalg.norm(y-mu_E) / np.sqrt(n - len(FS.active))

    con.mean[:] = mu_E
    con.covariance = sigma_E**2 * np.identity(n)

    print(sigma_E, sigma)
    Z = sample_from_constraints(con, 
                                y,
                                ndraw=ndraw,
                                burnin=burnin)
    
    pvalues = []
    for idx, var in enumerate(FS.active):
        active = copy(FS.active)
        active.remove(var)
        X_r = X[:,active] # restricted design
        mu_r = X_r.dot(np.linalg.pinv(X_r).dot(y))
        delta_mu = (mu_r - mu_E) / sigma_E**2

        W = np.exp(Z.dot(delta_mu))
        fam = discrete_family(Z.dot(X_Ei[idx].T), W)
        pval = fam.cdf(0, x=beta_bar[idx])
        pval = 2 * min(pval, 1 - pval)
        pvalues.append((pval, beta[var]))

    return pvalues
def compute_sampler_quantiles(n=500, p=100, signal_fac=1.2, s=5, sigma=1., rho=0., randomizer_scale=1, full_dispersion=True):

    inst, const = gaussian_instance, lasso.gaussian
    signal = np.sqrt(signal_fac * 2 * np.log(p))

    while True:
        X, Y, beta = inst(n=n,
                          p=p,
                          signal=signal,
                          s=s,
                          equicorrelated=False,
                          rho=rho,
                          sigma=sigma,
                          random_signs=True)[:3]

        idx = np.arange(p)
        sigmaX = rho ** np.abs(np.subtract.outer(idx, idx))
        print("snr", beta.T.dot(sigmaX).dot(beta) / ((sigma ** 2.) * n))

        n, p = X.shape

        if full_dispersion:
            dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
            sigma_ = np.sqrt(dispersion)
        W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_

        conv = const(X,
                     Y,
                     W,
                     randomizer_scale=randomizer_scale * sigma_)

        signs = conv.fit()
        nonzero = signs != 0
        (observed_target,
         cov_target,
         cov_target_score,
         alternatives) = selected_targets(conv.loglike,
                                          conv._W,
                                          nonzero,
                                          dispersion=dispersion)

        true_mean = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
        estimate, observed_info_mean, _, pval, intervals, _ = conv.selective_MLE(observed_target,
                                                                                 cov_target,
                                                                                 cov_target_score,
                                                                                 alternatives)

        opt_linear, opt_offset = conv.opt_transform
        target_precision = np.linalg.inv(cov_target)
        randomizer_cov, randomizer_precision = conv.randomizer.cov_prec
        score_linear = np.identity(p)
        target_linear = score_linear.dot(cov_target_score.T.dot(target_precision))
        target_offset = conv.observed_score_state - target_linear.dot(observed_target)

        nopt = opt_linear.shape[1]
        ntarget = target_linear.shape[1]

        implied_precision = np.zeros((ntarget + nopt, ntarget + nopt))
        implied_precision[:ntarget, :ntarget] = target_linear.T.dot(randomizer_precision).dot(target_linear) + target_precision
        implied_precision[:ntarget, ntarget:] = target_linear.T.dot(randomizer_precision).dot(opt_linear)
        implied_precision[ntarget:, :ntarget] = opt_linear.T.dot(randomizer_precision).dot(target_linear)
        implied_precision[ntarget:, ntarget:] = opt_linear.T.dot(randomizer_precision).dot(opt_linear)
        implied_cov = np.linalg.inv(implied_precision)

        conditioned_value = target_offset + opt_offset
        implied_mean = implied_cov.dot(np.hstack((target_precision.dot(true_mean)-target_linear.T.dot(randomizer_precision).dot(conditioned_value),
                                                  -opt_linear.T.dot(randomizer_precision).dot(conditioned_value))))

        A_scaling = np.zeros((nopt, ntarget+nopt))
        A_scaling[:,ntarget:] = -np.identity(nopt)
        b_scaling = np.zeros(nopt)
        affine_con = constraints(A_scaling,
                                 b_scaling,
                                 mean=implied_mean,
                                 covariance=implied_cov)

        initial_point = np.zeros(ntarget+nopt)
        initial_point[ntarget:] = conv.observed_opt_state

        sampler = sample_from_constraints(affine_con,
                                          initial_point,
                                          ndraw=500000,
                                          burnin=1000)

        print("sampler", sampler.shape, sampler[:,:ntarget].shape)
        mle_sample = []
        for j in range(sampler.shape[0]):
            estimate, _, _, _, _, _ = conv.selective_MLE(sampler[j,:ntarget],
                                                         cov_target,
                                                         cov_target_score,
                                                         alternatives)
            mle_sample.append(estimate)
            print("iteration ", j)
        mle_sample = np.asarray(mle_sample)
        print("check", mle_sample.shape, np.mean(mle_sample, axis=0) - true_mean)

        for i in range(nonzero.sum()):
            temp = 251 + i
            ax = plt.subplot(temp)
            stats.probplot(mle_sample[:,i], dist="norm", plot=pylab)
            plt.subplots_adjust(hspace=.5, wspace=.5)
        pylab.show()

        sampler_quantiles = np.vstack([np.percentile(mle_sample, 5, axis=0), np.percentile(mle_sample, 95, axis=0)])

        normal_quantiles = np.vstack((norm.ppf(0.05, loc=true_mean, scale=np.sqrt(np.diag(observed_info_mean))),
                                      norm.ppf(0.95, loc=true_mean, scale=np.sqrt(np.diag(observed_info_mean)))))

        print("sampler quantiles", sampler_quantiles.T)
        print("normal quantiles", normal_quantiles.T)
        break