Пример #1
0
    def setup_class(cls):
        d = macrodata.load_pandas().data
        # growth rates
        d['gs_l_realinv'] = 400 * np.log(d['realinv']).diff()
        d['gs_l_realgdp'] = 400 * np.log(d['realgdp']).diff()
        d['lint'] = d['realint'].shift(1)
        d['tbilrate'] = d['tbilrate'].shift(1)

        d = d.dropna()
        cls.d = d
        endogg = d['gs_l_realinv']
        exogg = add_constant(d[['gs_l_realgdp', 'lint']])
        exogg2 = add_constant(d[['gs_l_realgdp', 'tbilrate']])
        exogg3 = add_constant(d[['gs_l_realgdp']])

        res_ols = OLS(endogg, exogg).fit()
        res_ols2 = OLS(endogg, exogg2).fit()

        res_ols3 = OLS(endogg, exogg3).fit()

        cls.res = res_ols
        cls.res2 = res_ols2
        cls.res3 = res_ols3
        cls.endog = cls.res.model.endog
        cls.exog = cls.res.model.exog
Пример #2
0
    def test_add_constant_has_constant2darray(self):
        x = np.asarray([[1, 1, 1, 1], [1, 2, 3, 4.]]).T
        y = tools.add_constant(x, has_constant='skip')
        assert_equal(x, y)

        with pytest.raises(ValueError):
            tools.add_constant(x, has_constant='raise')

        assert_equal(tools.add_constant(x, has_constant='add'),
                     np.column_stack((np.ones(4), x)))
Пример #3
0
    def test_add_constant_has_constant1darray(self):
        x = np.ones(5)
        x = tools.add_constant(x, has_constant='skip')
        assert_equal(x, np.ones((5, 1)))

        with pytest.raises(ValueError):
            tools.add_constant(x, has_constant='raise')

        assert_equal(tools.add_constant(x, has_constant='add'),
                     np.ones((5, 2)))
Пример #4
0
def test_wls_tss():
    y = np.array([22, 22, 22, 23, 23, 23])
    X = [[1, 0], [1, 0], [1, 1], [0, 1], [0, 1], [0, 1]]

    ols_mod = OLS(y, add_constant(X, prepend=False)).fit()

    yw = np.array([22, 22, 23.])
    Xw = [[1, 0], [1, 1], [0, 1]]
    w = np.array([2, 1, 3.])

    wls_mod = WLS(yw, add_constant(Xw, prepend=False), weights=w).fit()
    assert_equal(ols_mod.centered_tss, wls_mod.centered_tss)
Пример #5
0
def test_poisson_residuals():
    nobs, k_exog = 100, 5
    np.random.seed(987125)
    x = np.random.randn(nobs, k_exog - 1)
    x = add_constant(x)

    y_true = x.sum(1) / 2
    y = y_true + 2 * np.random.randn(nobs)
    exposure = 1 + np.arange(nobs) // 4

    yp = np.random.poisson(np.exp(y_true) * exposure)
    yp[10:15] += 10

    fam = sm.families.Poisson()
    mod_poi_e = GLM(yp, x, family=fam, exposure=exposure)
    res_poi_e = mod_poi_e.fit()

    mod_poi_w = GLM(yp / exposure, x, family=fam, var_weights=exposure)
    res_poi_w = mod_poi_w.fit()

    assert_allclose(res_poi_e.resid_response / exposure,
                    res_poi_w.resid_response)
    assert_allclose(res_poi_e.resid_pearson,
                    res_poi_w.resid_pearson)
    assert_allclose(res_poi_e.resid_deviance,
                    res_poi_w.resid_deviance)
    assert_allclose(res_poi_e.resid_anscombe,
                    res_poi_w.resid_anscombe)
    assert_allclose(res_poi_e.resid_anscombe_unscaled,
                    res_poi_w.resid_anscombe)
Пример #6
0
 def setup_class(cls):
     path = os.path.join(current_path, 'results', 'mar_filardo.csv')
     cls.mar_filardo = pd.read_csv(path)
     cls.mar_filardo.index = pd.date_range('1948-02-01',
                                           '1991-04-01',
                                           freq='MS')
     true = {
         'params':
         np.r_[4.35941747, -1.6493936, 1.7702123, 0.9945672, 0.517298,
               -0.865888,
               np.exp(-0.362469)**2, 0.189474, 0.079344, 0.110944,
               0.122251],
         'llf':
         -586.5718,
         'llf_fit':
         -586.5718,
         'llf_fit_em':
         -586.5718
     }
     endog = cls.mar_filardo['dlip'].iloc[1:]
     exog_tvtp = add_constant(cls.mar_filardo['dmdlleading'].iloc[:-1])
     super(TestFilardoPandas, cls).setup_class(true,
                                               endog,
                                               k_regimes=2,
                                               order=4,
                                               switching_ar=False,
                                               exog_tvtp=exog_tvtp)
Пример #7
0
 def setup_class(cls):
     R = np.zeros(7)
     R[4:6] = [1, -1]
     data = datasets.longley.load(as_pandas=False)
     data.exog = add_constant(data.exog, prepend=False)
     res1 = OLS(data.endog, data.exog).fit()
     cls.Ttest1 = res1.t_test(R)
Пример #8
0
 def test_pandas_const_series_prepend(self):
     # Check that the constant is added in the expected column location
     dta = longley.load_pandas()
     series = dta.exog['GNP']
     series = tools.add_constant(series, prepend=True)
     assert_string_equal('const', series.columns[0])
     assert_equal(series.var(0)[0], 0)
Пример #9
0
def test_pandas_const_df_prepend():
    # GH#1025
    dta = longley.load_pandas().exog
    dta['UNEMP'] /= dta['UNEMP'].std()
    dta = tools.add_constant(dta, prepend=True)
    assert_string_equal('const', dta.columns[0])
    assert_equal(dta.var(0)[0], 0)
Пример #10
0
 def test_missing(self):
     data = datasets.longley.load(as_pandas=False)
     data.exog = add_constant(data.exog, prepend=False)
     data.endog[[3, 7, 14]] = np.nan
     mod = OLS(data.endog, data.exog, missing='drop')
     assert mod.endog.shape[0] == 13
     assert mod.exog.shape[0] == 13
Пример #11
0
 def setup_class(cls):
     data = datasets.longley.load(as_pandas=False)
     data.exog = add_constant(data.exog, prepend=False)
     cls.res1 = OLS(data.endog, data.exog).fit()
     R = np.identity(7)
     cls.Ttest = cls.res1.t_test(R)
     hyp = 'x1 = 0, x2 = 0, x3 = 0, x4 = 0, x5 = 0, x6 = 0, const = 0'
     cls.NewTTest = cls.res1.t_test(hyp)
Пример #12
0
    def setup_class(cls):
        d2 = macrodata.load_pandas().data
        g_gdp = 400 * np.diff(np.log(d2['realgdp'].values))
        g_inv = 400 * np.diff(np.log(d2['realinv'].values))
        exogg = add_constant(np.c_[g_gdp, d2['realint'][:-1].values],
                             prepend=False)

        cls.res1 = OLS(g_inv, exogg).fit()
Пример #13
0
def test_const_indicator():
    np.random.seed(12345)
    X = np.random.randint(0, 3, size=30)
    X = categorical(X, drop=True)
    y = np.dot(X, [1., 2., 3.]) + np.random.normal(size=30)
    modc = OLS(y, add_constant(X[:, 1:], prepend=True)).fit()
    mod = OLS(y, X, hasconst=True).fit()
    assert_almost_equal(modc.rsquared, mod.rsquared, 12)
Пример #14
0
 def test_add_constant_dataframe(self):
     df = pd.DataFrame([[1.0, 'a', 4], [2.0, 'bc', 9], [3.0, 'def', 16]])
     output = tools.add_constant(df)
     expected = pd.Series([1.0, 1.0, 1.0], name='const')
     tm.assert_series_equal(expected, output['const'])
     dfc = df.copy()
     dfc.insert(0, 'const', np.ones(3))
     tm.assert_frame_equal(dfc, output)
Пример #15
0
 def setup_class(cls):
     data = datasets.longley.load(as_pandas=False)
     data.exog = add_constant(data.exog, prepend=False)
     res1 = OLS(data.endog, data.exog).fit()
     R2 = [[0, 1, -1, 0, 0, 0, 0],
           [0, 0, 0, 0, 1, -1, 0]]
     cls.Ftest1 = res1.f_test(R2)
     hyp = 'x2 = x3, x5 = x6'
     cls.NewFtest1 = res1.f_test(hyp)
Пример #16
0
    def setup_class(cls):
        d2 = macrodata.load_pandas().data
        g_gdp = 400 * np.diff(np.log(d2['realgdp'].values))
        g_inv = 400 * np.diff(np.log(d2['realinv'].values))
        exogg = add_constant(np.c_[g_gdp, d2['realint'][:-1].values],
                             prepend=False)

        mod1 = GLSAR(g_inv, exogg, 1)
        cls.res = mod1.iterative_fit(5)
Пример #17
0
    def setup_class(cls):
        dta = datasets.longley.load(as_pandas=False)
        dta.exog = add_constant(dta.exog, prepend=True)

        wls_scalar = WLS(dta.endog, dta.exog, weights=1. / 3).fit()
        cls.res1 = wls_scalar

        weights = [1 / 3.] * len(dta.endog)
        wls_array = WLS(dta.endog, dta.exog, weights=weights).fit()
        cls.res2 = wls_array
Пример #18
0
 def setup_class(cls):
     data = sm.datasets.star98.load(as_pandas=False)
     data.exog = add_constant(data.exog, prepend=False)
     cls.res1 = GLM(data.endog, data.exog,
                    family=sm.families.Binomial()).fit()
     weights = data.endog.sum(axis=1)
     endog2 = data.endog[:, 0] / weights
     cls.res2 = GLM(endog2, data.exog,
                    family=sm.families.Binomial(),
                    var_weights=weights).fit()
Пример #19
0
def test_wls_example():
    # example from the docstring, there was a note about a bug, should
    # be fixed now
    Y = [1, 3, 4, 5, 2, 3, 4]
    X = list(range(1, 8))
    X = add_constant(X, prepend=False)
    wls_model = WLS(Y, X, weights=list(range(1, 8))).fit()
    # taken from R lm.summary
    assert_almost_equal(wls_model.fvalue, 0.127337843215, 6)
    assert_almost_equal(wls_model.scale, 2.44608530786**2, 6)
Пример #20
0
 def setup_class(cls):
     data = datasets.longley.load(as_pandas=False)
     data.exog = add_constant(data.exog, prepend=False)
     res1 = OLS(data.endog, data.exog).fit()
     R = np.array([[0, 1, 1, 0, 0, 0, 0],
                   [0, 1, 0, 1, 0, 0, 0],
                   [0, 1, 0, 0, 0, 0, 0],
                   [0, 0, 0, 0, 1, 0, 0],
                   [0, 0, 0, 0, 0, 1, 0]])
     q = np.array([0, 0, 0, 1, 0])
     cls.Ftest1 = res1.f_test((R, q))
Пример #21
0
 def setup_class(cls):
     data = datasets.longley.load(as_pandas=False)
     data.exog = add_constant(data.exog, prepend=False)
     ols_res = OLS(data.endog, data.exog).fit()
     gls_res = GLS(data.endog, data.exog).fit()
     gls_res_scalar = GLS(data.endog, data.exog, sigma=1)
     cls.endog = data.endog
     cls.exog = data.exog
     cls.res1 = gls_res
     cls.res2 = ols_res
     cls.res3 = gls_res_scalar  # TODO: Do something with this?
Пример #22
0
 def test_add_constant_recarray(self):
     dt = np.dtype([('', int), ('', '<S4'),
                    ('', np.float32), ('', np.float64)])
     x = np.array([(1, 'abcd', 1.0, 2.0),
                   (7, 'abcd', 2.0, 4.0),
                   (21, 'abcd', 2.0, 8.0)], dt)
     x = x.view(np.recarray)
     y = tools.add_constant(x)
     assert_equal(y['const'], np.array([1.0, 1.0, 1.0]))
     for f in x.dtype.fields:
         assert y[f].dtype == x[f].dtype
Пример #23
0
 def setup_class(cls):
     data = datasets.longley.load(as_pandas=False)
     data.exog = add_constant(data.exog, prepend=False)
     y = data.endog
     X = data.exog
     n = y.shape[0]
     w = np.ones(n)
     cls.results = []
     cls.results.append(OLS(y, X).fit())
     cls.results.append(WLS(y, X, w).fit())
     cls.results.append(GLS(y, X, 100 * w).fit())
     cls.results.append(GLS(y, X, np.diag(0.1 * w)).fit())
Пример #24
0
    def setup_class(cls):
        from sm2.datasets.star98 import load
        data = load(as_pandas=False)
        exog = add_constant(data.exog, prepend=True)
        offset = np.ones(len(data.endog))
        exog_keep = exog[:, :-5]
        cls.mod2 = GLM(data.endog, exog_keep, family=family.Binomial(),
                       offset=offset)

        cls.mod1 = GLM(data.endog, exog, family=family.Binomial(),
                       offset=offset)
        cls.init()
Пример #25
0
    def setup_class(cls):
        d = macrodata.load_pandas().data
        # growth rates
        gs_l_realinv = 400 * np.diff(np.log(d['realinv'].values))
        gs_l_realgdp = 400 * np.diff(np.log(d['realgdp'].values))
        lint = d['realint'][:-1].values
        tbilrate = d['tbilrate'][:-1].values

        endogg = gs_l_realinv
        exogg = add_constant(np.c_[gs_l_realgdp, lint])
        exogg2 = add_constant(np.c_[gs_l_realgdp, tbilrate])
        exogg3 = add_constant(np.c_[gs_l_realgdp])

        res_ols = OLS(endogg, exogg).fit()
        res_ols2 = OLS(endogg, exogg2).fit()

        res_ols3 = OLS(endogg, exogg3).fit()

        cls.res = res_ols
        cls.res2 = res_ols2
        cls.res3 = res_ols3
        cls.endog = cls.res.model.endog
        cls.exog = cls.res.model.exog
Пример #26
0
def test_outlier_influence_funcs(reset_randomstate):
    x = add_constant(np.random.randn(10, 2))
    y = x.sum(1) + np.random.randn(10)
    res = OLS(y, x).fit()
    out_05 = oi.summary_table(res)
    # GH#3344 : Check alpha has an effect
    out_01 = oi.summary_table(res, alpha=0.01)
    assert np.all(out_01[1][:, 6] <= out_05[1][:, 6])
    assert np.all(out_01[1][:, 7] >= out_05[1][:, 7])

    res2 = OLS(y, x[:, 0]).fit()
    oi.summary_table(res2, alpha=0.05)
    infl = res2.get_influence()
    infl.summary_table()
Пример #27
0
    def setup_class(cls):
        data = datasets.longley.load(as_pandas=False)
        data.exog = add_constant(data.exog, prepend=False)
        y = data.endog
        X = data.exog
        n = y.shape[0]
        np.random.seed(5)
        w = np.random.uniform(0.5, 1, n)
        w_inv = 1. / w

        cls.results = []
        cls.results.append(WLS(y, X, w).fit())
        cls.results.append(WLS(y, X, 0.01 * w).fit())
        cls.results.append(GLS(y, X, 100 * w_inv).fit())
        cls.results.append(GLS(y, X, np.diag(0.1 * w_inv)).fit())
Пример #28
0
 def setup_class(cls):
     data = datasets.longley.load(as_pandas=False)
     exog = add_constant(np.column_stack((data.exog[:, 1],
                                          data.exog[:, 4])),
                         prepend=False)
     tmp_results = OLS(data.endog, exog).fit()
     rho = np.corrcoef(tmp_results.resid[1:],
                       tmp_results.resid[:-1])[0][1]  # by assumption
     order = toeplitz(np.arange(16))
     sigma = rho**order
     cls.res1 = GLS(data.endog, exog, sigma=sigma).fit()
     # attach for test_missing
     cls.sigma = sigma
     cls.exog = exog
     cls.endog = data.endog
Пример #29
0
    def setup_class(cls):
        dtapa = grunfeld.data.load_pandas()
        # Stata example/data seems to miss last firm
        dtapa_endog = dtapa.endog[:200]
        dtapa_exog = dtapa.exog[:200]
        exog = add_constant(dtapa_exog[['value', 'capital']], prepend=False)
        cls.res1 = OLS(dtapa_endog, exog).fit()

        firm_names, firm_id = np.unique(np.asarray(dtapa_exog[['firm']], 'S20'),
                                        return_inverse=True)
        cls.groups = firm_id
        # time indicator in range(max Ti)
        time = np.asarray(dtapa_exog[['year']])
        time -= time.min()
        cls.time = np.squeeze(time).astype(int)
        # nw_panel function requires interval bounds
        cls.tidx = [(i * 20, 20 * (i + 1)) for i in range(10)]
Пример #30
0
    def setup_class(cls):
        data = datasets.longley.load(as_pandas=False)
        data.exog = add_constant(data.exog, prepend=False)
        cls.res1 = OLS(data.endog, data.exog).fit()
        #cls.res2.wresid = cls.res1.wresid  # workaround hack

        res_qr = OLS(data.endog, data.exog).fit(method="qr")

        model_qr = OLS(data.endog, data.exog)
        Q, R = np.linalg.qr(data.exog)
        model_qr.exog_Q, model_qr.exog_R = Q, R
        model_qr.normalized_cov_params = np.linalg.inv(np.dot(R.T, R))
        model_qr.rank = np.linalg.matrix_rank(R)
        res_qr2 = model_qr.fit(method="qr")

        cls.res_qr = res_qr
        cls.res_qr_manual = res_qr2