def test_formula():

    for j in 0, 1:

        np.random.seed(34234)
        n = 200
        y = np.random.randint(0, 2, size=n)
        x1 = np.random.normal(size=n)
        x2 = np.random.normal(size=n)
        g = np.random.randint(0, 25, size=n)

        x = np.hstack((x1[:, None], x2[:, None]))
        if j == 0:
            model1 = ConditionalLogit(y, x, groups=g)
        else:
            model1 = ConditionalPoisson(y, x, groups=g)
        result1 = model1.fit()

        df = pd.DataFrame({"y": y, "x1": x1, "x2": x2, "g": g})
        if j == 0:
            model2 = ConditionalLogit.from_formula(
                        "y ~ 0 + x1 + x2", groups="g", data=df)
        else:
            model2 = ConditionalPoisson.from_formula(
                        "y ~ 0 + x1 + x2", groups="g", data=df)
        result2 = model2.fit()

        assert_allclose(result1.params, result2.params, rtol=1e-5)
        assert_allclose(result1.bse, result2.bse, rtol=1e-5)
        assert_allclose(result1.cov_params(), result2.cov_params(), rtol=1e-5)
        assert_allclose(result1.tvalues, result2.tvalues, rtol=1e-5)
Exemplo n.º 2
0
def test_logit_1d():

    y = np.r_[0, 1, 0, 1, 0, 1, 0, 1, 1, 1]
    g = np.r_[0, 0, 0, 1, 1, 1, 2, 2, 2, 2]

    x = np.r_[0, 1, 0, 0, 1, 1, 0, 0, 1, 0]
    x = x[:, None]

    model = ConditionalLogit(y, x, groups=g)

    # Check the gradient for the denominator of the partial likelihood
    for x in -1, 0, 1, 2:
        params = np.r_[x, ]
        _, grad = model._denom_grad(0, params)
        ngrad = approx_fprime(params, lambda x: model._denom(0, x))
        assert_allclose(grad, ngrad)

    # Check the gradient for the loglikelihood
    for x in -1, 0, 1, 2:
        grad = approx_fprime(np.r_[x, ], model.loglike)
        score = model.score(np.r_[x, ])
        assert_allclose(grad, score, rtol=1e-4)

    result = model.fit()

    # From Stata
    assert_allclose(result.params, np.r_[0.9272407], rtol=1e-5)
    assert_allclose(result.bse, np.r_[1.295155], rtol=1e-5)
Exemplo n.º 3
0
def test_formula():

    for j in 0, 1:

        np.random.seed(34234)
        n = 200
        y = np.random.randint(0, 2, size=n)
        x1 = np.random.normal(size=n)
        x2 = np.random.normal(size=n)
        g = np.random.randint(0, 25, size=n)

        x = np.hstack((x1[:, None], x2[:, None]))
        if j == 0:
            model1 = ConditionalLogit(y, x, groups=g)
        else:
            model1 = ConditionalPoisson(y, x, groups=g)
        result1 = model1.fit()

        df = pd.DataFrame({"y": y, "x1": x1, "x2": x2, "g": g})
        if j == 0:
            model2 = ConditionalLogit.from_formula(
                        "y ~ 0 + x1 + x2", groups="g", data=df)
        else:
            model2 = ConditionalPoisson.from_formula(
                        "y ~ 0 + x1 + x2", groups="g", data=df)
        result2 = model2.fit()

        assert_allclose(result1.params, result2.params, rtol=1e-5)
        assert_allclose(result1.bse, result2.bse, rtol=1e-5)
        assert_allclose(result1.cov_params(), result2.cov_params(), rtol=1e-5)
        assert_allclose(result1.tvalues, result2.tvalues, rtol=1e-5)
Exemplo n.º 4
0
def test_logit_2d():

    y = np.r_[0, 1, 0, 1, 0, 1, 0, 1, 1, 1]
    g = np.r_[0, 0, 0, 1, 1, 1, 2, 2, 2, 2]

    x1 = np.r_[0, 1, 0, 0, 1, 1, 0, 0, 1, 0]
    x2 = np.r_[0, 0, 1, 0, 0, 1, 0, 1, 1, 1]
    x = np.empty((10, 2))
    x[:, 0] = x1
    x[:, 1] = x2

    model = ConditionalLogit(y, x, groups=g)

    # Check the gradient for the denominator of the partial likelihood
    for x in -1, 0, 1, 2:
        params = np.r_[x, -1.5*x]
        _, grad = model._denom_grad(0, params)
        ngrad = approx_fprime(params, lambda x: model._denom(0, x))
        assert_allclose(grad, ngrad, rtol=1e-5)

    # Check the gradient for the loglikelihood
    for x in -1, 0, 1, 2:
        params = np.r_[-0.5*x, 0.5*x]
        grad = approx_fprime(params, model.loglike)
        score = model.score(params)
        assert_allclose(grad, score, rtol=1e-4)

    result = model.fit()

    # From Stata
    assert_allclose(result.params, np.r_[1.011074, 1.236758], rtol=1e-3)
    assert_allclose(result.bse, np.r_[1.420784, 1.361738], rtol=1e-5)

    result.summary()
Exemplo n.º 5
0
def conditional_logit_regression(x, y, groups, method="newton"):
    if isinstance(groups, str):
        groups = x[groups]
    model = ConditionalLogit(y, x, groups=groups)
    result = model.fit(method=method)
    summary = result.summary()
    odds_radio = get_odds_radio(result)
    return result, summary, odds_radio
Exemplo n.º 6
0
def test_lasso_logistic():

    np.random.seed(3423948)

    n = 200
    groups = np.arange(10)
    groups = np.kron(groups, np.ones(n // 10))
    group_effects = np.random.normal(size=10)
    group_effects = np.kron(group_effects, np.ones(n // 10))

    x = np.random.normal(size=(n, 4))
    params = np.r_[0, 0, 1, 0]
    lin_pred = np.dot(x, params) + group_effects

    mean = 1 / (1 + np.exp(-lin_pred))
    y = (np.random.uniform(size=n) < mean).astype(np.int)

    model0 = ConditionalLogit(y, x, groups=groups)
    result0 = model0.fit()

    # Should be the same as model0
    model1 = ConditionalLogit(y, x, groups=groups)
    result1 = model1.fit_regularized(L1_wt=0, alpha=0)

    assert_allclose(result0.params, result1.params, rtol=1e-3)

    model2 = ConditionalLogit(y, x, groups=groups)
    result2 = model2.fit_regularized(L1_wt=1, alpha=0.05)

    # Rxegression test
    assert_allclose(result2.params, np.r_[0, 0, 0.55235152, 0], rtol=1e-4)

    # Test with formula
    df = pd.DataFrame({
        "y": y,
        "x1": x[:, 0],
        "x2": x[:, 1],
        "x3": x[:, 2],
        "x4": x[:, 3],
        "groups": groups
    })
    fml = "y ~ 0 + x1 + x2 + x3 + x4"
    model3 = ConditionalLogit.from_formula(fml, groups="groups", data=df)
    result3 = model3.fit_regularized(L1_wt=1, alpha=0.05)
    assert_allclose(result2.params, result3.params)
def test_logit_1d():

    y = np.r_[0, 1, 0, 1, 0, 1, 0, 1, 1, 1]
    g = np.r_[0, 0, 0, 1, 1, 1, 2, 2, 2, 2]

    x = np.r_[0, 1, 0, 0, 1, 1, 0, 0, 1, 0]
    x = x[:, None]

    model = ConditionalLogit(y, x, groups=g)

    # Check the gradient for the denominator of the partial likelihood
    for x in -1, 0, 1, 2:
        params = np.r_[x, ]
        _, grad = model._denom_grad(0, params)
        ngrad = approx_fprime(params, lambda x: model._denom(0, x))
        assert_allclose(grad, ngrad)

    # Check the gradient for the loglikelihood
    for x in -1, 0, 1, 2:
        grad = approx_fprime(np.r_[x, ], model.loglike)
        score = model.score(np.r_[x, ])
        assert_allclose(grad, score, rtol=1e-4)

    result = model.fit()

    # From Stata
    assert_allclose(result.params, np.r_[0.9272407], rtol=1e-5)
    assert_allclose(result.bse, np.r_[1.295155], rtol=1e-5)
def test_logit_2d():

    y = np.r_[0, 1, 0, 1, 0, 1, 0, 1, 1, 1]
    g = np.r_[0, 0, 0, 1, 1, 1, 2, 2, 2, 2]

    x1 = np.r_[0, 1, 0, 0, 1, 1, 0, 0, 1, 0]
    x2 = np.r_[0, 0, 1, 0, 0, 1, 0, 1, 1, 1]
    x = np.empty((10, 2))
    x[:, 0] = x1
    x[:, 1] = x2

    model = ConditionalLogit(y, x, groups=g)

    # Check the gradient for the denominator of the partial likelihood
    for x in -1, 0, 1, 2:
        params = np.r_[x, -1.5*x]
        _, grad = model._denom_grad(0, params)
        ngrad = approx_fprime(params, lambda x: model._denom(0, x))
        assert_allclose(grad, ngrad, rtol=1e-5)

    # Check the gradient for the loglikelihood
    for x in -1, 0, 1, 2:
        params = np.r_[-0.5*x, 0.5*x]
        grad = approx_fprime(params, model.loglike)
        score = model.score(params)
        assert_allclose(grad, score, rtol=1e-4)

    result = model.fit()

    # From Stata
    assert_allclose(result.params, np.r_[1.011074, 1.236758], rtol=1e-3)
    assert_allclose(result.bse, np.r_[1.420784, 1.361738], rtol=1e-5)

    result.summary()
Exemplo n.º 9
0
def test_lasso_logistic():

    np.random.seed(3423948)

    n = 200
    groups = np.arange(10)
    groups = np.kron(groups, np.ones(n // 10))
    group_effects = np.random.normal(size=10)
    group_effects = np.kron(group_effects, np.ones(n // 10))

    x = np.random.normal(size=(n, 4))
    params = np.r_[0, 0, 1, 0]
    lin_pred = np.dot(x, params) + group_effects

    mean = 1 / (1 + np.exp(-lin_pred))
    y = (np.random.uniform(size=n) < mean).astype(np.int)

    model0 = ConditionalLogit(y, x, groups=groups)
    result0 = model0.fit()

    # Should be the same as model0
    model1 = ConditionalLogit(y, x, groups=groups)
    result1 = model1.fit_regularized(L1_wt=0, alpha=0)

    assert_allclose(result0.params, result1.params, rtol=1e-3)

    model2 = ConditionalLogit(y, x, groups=groups)
    result2 = model2.fit_regularized(L1_wt=1, alpha=0.05)

    # Rxegression test
    assert_allclose(result2.params, np.r_[0, 0, 0.55235152, 0], rtol=1e-4)

    # Test with formula
    df = pd.DataFrame({"y": y, "x1": x[:, 0], "x2": x[:, 1], "x3": x[:, 2],
                       "x4": x[:, 3], "groups": groups})
    fml = "y ~ 0 + x1 + x2 + x3 + x4"
    model3 = ConditionalLogit.from_formula(fml, groups="groups", data=df)
    result3 = model3.fit_regularized(L1_wt=1, alpha=0.05)
    assert_allclose(result2.params, result3.params)
Exemplo n.º 10
0
        temp = {'是': 1, "否": 0}
        for weizhi, f in enumerate(['降低生活水平', '付更高的税']):
            temp2 = [0, 0, 0]
            temp2[0] = zhi
            temp2[1] = temp[data_temp[f]]
            temp2[2] = weizhi
            tmp = tmp.append(
                pd.DataFrame([{
                    '记录': temp2[0],
                    'y': temp2[1],
                    "x": temp2[2]
                }]))

tmp = tmp.reset_index()
del tmp['index']
# tmp.to_csv(r"D:/书籍资料整理/属性数据分析/环保_展开.csv")
x = np.array(tmp['x'])
xx = x[:, None]
yy = np.array(tmp['y'])
g = np.array(tmp['记录'])

model = ConditionalLogit(yy, xx, groups=g)
#这里fit的时候会提示删除了905个没有组内方差的.也就是组内数据相同的值.

result = model.fit()
result.summary()
#书中给出的结果为23%,由于书中直接用n求得.
#而用软件为21%。
#R与这个软件一直所以不研究了
# (3)对称logistic  没有找到实现方式并且网上关于这个的文章很少.