Exemplo n.º 1
0
def test_sir_regularized_numdiff():
    # Use numeric gradients to check the analytic gradient
    # for the regularized SIRobjective function.

    np.random.seed(93482)

    n = 1000
    p = 10
    xmat = np.random.normal(size=(n, p))
    y1 = np.dot(xmat, np.linspace(-1, 1, p))
    y2 = xmat.sum(1)
    y = y2 / (1 + y1**2) + np.random.normal(size=n)
    model = SlicedInverseReg(y, xmat)
    _ = model.fit()

    # Second difference penalty matrix.
    fmat = np.zeros((p - 2, p))
    for i in range(p - 2):
        fmat[i, i:i + 3] = [1, -2, 1]

    _ = model.fit_regularized(2, 3 * fmat)

    # Compare the gradients to the numerical derivatives
    for _ in range(5):
        pa = np.random.normal(size=(p, 2))
        pa, _, _ = np.linalg.svd(pa, 0)
        gn = approx_fprime(pa.ravel(), model._regularized_objective, 1e-7)
        gr = model._regularized_grad(pa.ravel())
        assert_allclose(gn, gr, atol=1e-5, rtol=1e-4)
Exemplo n.º 2
0
def test_sir_regularized_2d():
    # Compare regularized SIR to traditional SIR when there is no penalty.
    # The two procedures should agree exactly.

    np.random.seed(93482)

    n = 1000
    p = 10
    xmat = np.random.normal(size=(n, p))
    y1 = np.dot(xmat[:, 0:4], np.r_[1, 1, -1, -1])
    y2 = np.dot(xmat[:, 4:8], np.r_[1, 1, -1, -1])
    y = y1 + np.arctan(y2) + np.random.normal(size=n)
    model = SlicedInverseReg(y, xmat)
    rslt1 = model.fit()

    fmat = np.zeros((1, p))

    for d in 1, 2, 3, 4:
        if d < 3:
            rslt2 = model.fit_regularized(d, fmat)
        else:
            with pytest.warns(UserWarning, match="SIR.fit_regularized did"):
                rslt2 = model.fit_regularized(d, fmat)
        pa1 = rslt1.params[:, 0:d]
        pa1, _, _ = np.linalg.svd(pa1, 0)
        pa2 = rslt2.params
        _, s, _ = np.linalg.svd(np.dot(pa1.T, pa2))
        assert_allclose(np.sum(s), d, atol=1e-1, rtol=1e-1)
Exemplo n.º 3
0
def test_sir_regularized_1d():
    # Compare regularized SIR to traditional SIR, in a setting where the
    # regularization is compatible with the true parameters (i.e. there
    # is no regularization bias).

    np.random.seed(93482)

    n = 1000
    p = 10
    xmat = np.random.normal(size=(n, p))
    y = np.dot(xmat[:, 0:4], np.r_[1, 1, -1, -1]) + np.random.normal(size=n)
    model = SlicedInverseReg(y, xmat)
    rslt = model.fit()

    # The penalty drives p[0] ~ p[1] and p[2] ~ p[3]]
    fmat = np.zeros((2, p))
    fmat[0, 0:2] = [1, -1]
    fmat[1, 2:4] = [1, -1]

    rslt2 = model.fit_regularized(1, 3 * fmat)

    pa0 = np.zeros(p)
    pa0[0:4] = [1, 1, -1, -1]
    pa1 = rslt.params[:, 0]
    pa2 = rslt2.params[:, 0:2]

    # Compare two 1d subspaces
    def sim(x, y):
        x = x / np.sqrt(np.sum(x * x))
        y = y / np.sqrt(np.sum(y * y))
        return 1 - np.abs(np.dot(x, y))

    # Regularized SIRshould be closer to the truth than traditional SIR
    assert_equal(sim(pa0, pa1) > sim(pa0, pa2), True)

    # Regularized SIR should be close to the truth
    assert_equal(sim(pa0, pa2) < 1e-3, True)

    # Regularized SIR should have a smaller penalty value than traditional SIR
    assert_equal(
        np.sum(np.dot(fmat, pa1)**2) > np.sum(np.dot(fmat, pa2)**2), True)
Exemplo n.º 4
0
def test_poisson():

    np.random.seed(43242)

    # Generate a non-orthogonal design matrix
    xmat = np.random.normal(size=(500, 5))
    xmat[:, 1] = 0.5 * xmat[:, 0] + np.sqrt(1 - 0.5**2) * xmat[:, 1]
    xmat[:, 3] = 0.5 * xmat[:, 2] + np.sqrt(1 - 0.5**2) * xmat[:, 3]

    b = np.r_[0, 1, -1, 0, 0.5]
    lpr = np.dot(xmat, b)
    ev = np.exp(lpr)
    y = np.random.poisson(ev)

    for method in range(6):

        if method == 0:
            model = SlicedInverseReg(y, xmat)
            rslt = model.fit()
        elif method == 1:
            model = SAVE(y, xmat)
            rslt = model.fit(slice_n=100)
        elif method == 2:
            model = SAVE(y, xmat, bc=True)
            rslt = model.fit(slice_n=100)
        elif method == 3:
            df = pd.DataFrame({
                "y": y,
                "x0": xmat[:, 0],
                "x1": xmat[:, 1],
                "x2": xmat[:, 2],
                "x3": xmat[:, 3],
                "x4": xmat[:, 4]
            })
            model = SlicedInverseReg.from_formula(
                "y ~ 0 + x0 + x1 + x2 + x3 + x4", data=df)
            rslt = model.fit()
        elif method == 4:
            model = PHD(y, xmat)
            rslt = model.fit()
        elif method == 5:
            model = PHD(y, xmat)
            rslt = model.fit(resid=True)

        # Check for concentration in one direction (this is
        # a single index model)
        assert_equal(np.abs(rslt.eigs[0] / rslt.eigs[1]) > 5, True)

        # Check that the estimated direction aligns with the true
        # direction
        params = np.asarray(rslt.params)
        q = np.dot(params[:, 0], b)
        q /= np.sqrt(np.sum(params[:, 0]**2))
        q /= np.sqrt(np.sum(b**2))
        assert_equal(np.abs(q) > 0.95, True)