def test_regularized_weights(self): np.random.seed(1432) exog1 = np.random.normal(size=(100, 3)) endog1 = exog1[:, 0] + exog1[:, 1] + np.random.normal(size=100) exog2 = np.random.normal(size=(100, 3)) endog2 = exog2[:, 0] + exog2[:, 1] + np.random.normal(size=100) exog_a = np.vstack((exog1, exog1, exog2)) endog_a = np.concatenate((endog1, endog1, endog2)) # Should be equivalent to exog_a, endog_a. exog_b = np.vstack((exog1, exog2)) endog_b = np.concatenate((endog1, endog2)) wgts = np.ones(200) wgts[0:100] = 2 sigma = np.diag(1 / wgts) # TODO: parametrize? for L1_wt in [0, 0.5, 1]: for alpha in [0, 1]: mod1 = OLS(endog_a, exog_a) rslt1 = mod1.fit_regularized(L1_wt=L1_wt, alpha=alpha) mod2 = WLS(endog_b, exog_b, weights=wgts) rslt2 = mod2.fit_regularized(L1_wt=L1_wt, alpha=alpha) mod3 = GLS(endog_b, exog_b, sigma=sigma) rslt3 = mod3.fit_regularized(L1_wt=L1_wt, alpha=alpha) assert_almost_equal(rslt1.params, rslt2.params, decimal=3) assert_almost_equal(rslt1.params, rslt3.params, decimal=3)
def test_fixed_scale(self): cov_type = 'fixed_scale' kwds = {} res1 = self.res_ols.get_robustcov_results(cov_type, **kwds) res2 = self.res_wls.get_robustcov_results(cov_type, **kwds) assert_allclose(res1.params, res2.params, rtol=1e-13) assert_allclose(res1.cov_params(), res2.cov_params(), rtol=1e-13) assert_allclose(res1.bse, res2.bse, rtol=1e-13) assert_allclose(res1.pvalues, res2.pvalues, rtol=1e-12) tt = res2.t_test(np.eye(len(res2.params)), cov_p=res2.normalized_cov_params) assert_allclose(res2.cov_params(), res2.normalized_cov_params, rtol=1e-13) assert_allclose(res2.bse, tt.sd, rtol=1e-13) assert_allclose(res2.pvalues, tt.pvalue, rtol=1e-13) assert_allclose(res2.tvalues, tt.tvalue, rtol=1e-13) # using cov_type in fit mod = self.res_wls.model mod3 = WLS(mod.endog, mod.exog, weights=mod.weights) res3 = mod3.fit(cov_type=cov_type, cov_kwds=kwds) tt = res3.t_test(np.eye(len(res3.params)), cov_p=res3.normalized_cov_params) assert_allclose(res3.cov_params(), res3.normalized_cov_params, rtol=1e-13) assert_allclose(res3.bse, tt.sd, rtol=1e-13) assert_allclose(res3.pvalues, tt.pvalue, rtol=1e-13) assert_allclose(res3.tvalues, tt.tvalue, rtol=1e-13)
def test_cov_type_fixed_scale(): # this is a unit test from scipy curvefit for `absolute_sigma` keyword xdata = np.array([0, 1, 2, 3, 4, 5]) ydata = np.array([1, 1, 5, 7, 8, 12]) sigma = np.array([1, 2, 1, 2, 1, 2]) xdata = np.column_stack((xdata, np.ones(len(xdata)))) weights = 1. / sigma**2 res = WLS(ydata, xdata, weights=weights).fit() assert_allclose(res.bse, [0.20659803, 0.57204404], rtol=1e-3) res = WLS(ydata, xdata, weights=weights).fit() assert_allclose(res.bse, [0.20659803, 0.57204404], rtol=1e-3) res = WLS(ydata, xdata, weights=weights).fit(cov_type='fixed scale') assert_allclose(res.bse, [0.30714756, 0.85045308], rtol=1e-3) res = WLS(ydata, xdata, weights=weights / 9.).fit(cov_type='fixed scale') assert_allclose(res.bse, [3 * 0.30714756, 3 * 0.85045308], rtol=1e-3) res = WLS(ydata, xdata, weights=weights).fit(cov_type='fixed scale', cov_kwds={'scale': 9}) assert_allclose(res.bse, [3 * 0.30714756, 3 * 0.85045308], rtol=1e-3)
def setup_class(cls): dta = datasets.longley.load(as_pandas=False) dta.exog = add_constant(dta.exog, prepend=True) wls_scalar = WLS(dta.endog, dta.exog, weights=1. / 3).fit() cls.res1 = wls_scalar weights = [1 / 3.] * len(dta.endog) wls_array = WLS(dta.endog, dta.exog, weights=weights).fit() cls.res2 = wls_array
def setup_class(cls): data = datasets.longley.load(as_pandas=False) data.exog = add_constant(data.exog, prepend=False) y = data.endog X = data.exog n = y.shape[0] np.random.seed(5) w = np.random.uniform(0.5, 1, n) w_inv = 1. / w cls.results = [] cls.results.append(WLS(y, X, w).fit()) cls.results.append(WLS(y, X, 0.01 * w).fit()) cls.results.append(GLS(y, X, 100 * w_inv).fit()) cls.results.append(GLS(y, X, np.diag(0.1 * w_inv)).fit())
def test_equivalence_unweighted(self, check): res = WLS(self.endog1, self.exog1).fit() minres = _MinimalWLS(self.endog1, self.exog1, check_endog=check, check_weights=check).fit() assert_allclose(res.params, minres.params) assert_allclose(res.resid, minres.resid)
def test_equivalence_unweighted2(self, check): # TODO: Better name than 1 vs 2? res = WLS(self.endog2, self.exog2).fit() minres = _MinimalWLS(self.endog2, self.exog2, check_endog=check, check_weights=check).fit() assert_allclose(res.params, minres.params) assert_allclose(res.resid, minres.resid)
def test_wls_missing(): data = datasets.ccard.load(as_pandas=False) endog = data.endog endog[[10, 25]] = np.nan mod = WLS(data.endog, data.exog, weights=1 / data.exog[:, 2], missing='drop') assert mod.endog.shape[0] == 70 assert mod.exog.shape[0] == 70 assert mod.weights.shape[0] == 70
def test_wls_example(): # example from the docstring, there was a note about a bug, should # be fixed now Y = [1, 3, 4, 5, 2, 3, 4] X = list(range(1, 8)) X = add_constant(X, prepend=False) wls_model = WLS(Y, X, weights=list(range(1, 8))).fit() # taken from R lm.summary assert_almost_equal(wls_model.fvalue, 0.127337843215, 6) assert_almost_equal(wls_model.scale, 2.44608530786**2, 6)
def setup_class(cls): nobs, k_exog = 100, 5 np.random.seed(987125) x = np.random.randn(nobs, k_exog - 1) x = add_constant(x) cls.aweights = np.random.randint(1, 10, nobs) y_true = x.sum(1) / 2 y = y_true + 2 * np.random.randn(nobs) cls.endog = y cls.exog = x cls.idx_p_uc = np.array(cls.idx_uc) cls.exogc = xc = x[:, cls.idx_uc] mod_ols_c = WLS(y - 0.5 * x[:, 1], xc, weights=cls.aweights) mod_ols_c.exog_names[:] = ['const', 'x2', 'x3', 'x4'] cls.mod2 = mod_ols_c cls.res2 = cls.mod2.fit(**cls.fit_kwargs) cls.init()
def setup_class(cls): # from example wls.py nsample = 50 x = np.linspace(0, 20, nsample) X = np.column_stack((x, (x - 5)**2)) X = add_constant(X) beta = [5., 0.5, -0.01] sig = 0.5 w = np.ones(nsample) w[int(nsample * 6. / 10):] = 3 y_true = np.dot(X, beta) e = np.random.normal(size=nsample) y = y_true + sig * w * e X = X[:, [0, 1]] # WLS knowing the true variance ratio of heteroscedasticity mod_wls = WLS(y, X, weights=1. / w) cls.res_wls = mod_wls.fit()
def setup_class(cls): data = datasets.longley.load(as_pandas=False) data.exog = add_constant(data.exog, prepend=False) y = data.endog X = data.exog n = y.shape[0] w = np.ones(n) cls.results = [] cls.results.append(OLS(y, X).fit()) cls.results.append(WLS(y, X, w).fit()) cls.results.append(GLS(y, X, 100 * w).fit()) cls.results.append(GLS(y, X, np.diag(0.1 * w)).fit())
def test_wls_tss(): y = np.array([22, 22, 22, 23, 23, 23]) X = [[1, 0], [1, 0], [1, 1], [0, 1], [0, 1], [0, 1]] ols_mod = OLS(y, add_constant(X, prepend=False)).fit() yw = np.array([22, 22, 23.]) Xw = [[1, 0], [1, 1], [0, 1]] w = np.array([2, 1, 3.]) wls_mod = WLS(yw, add_constant(Xw, prepend=False), weights=w).fit() assert_equal(ols_mod.centered_tss, wls_mod.centered_tss)
def test_fvalue_only_constant(): # GH#3642 if only constant in model, fvalue and f_pvalue should be np.nan nobs = 20 np.random.seed(2) x = np.ones(nobs) y = np.random.randn(nobs) res = OLS(y, x).fit(cov_type='hac', cov_kwds={'maxlags': 3}) assert np.isnan(res.fvalue) assert np.isnan(res.f_pvalue) # 2018-03-05 disabling smoke-test from upstream # res.summary() res = WLS(y, x).fit(cov_type='HC1') assert np.isnan(res.fvalue) assert np.isnan(res.f_pvalue)
def test_fvalue_implicit_constant(): # GH#2444 if constant is implicit, return nan see nobs = 100 np.random.seed(2) x = np.random.randn(nobs, 1) x = ((x > 0) == [True, False]).astype(int) y = x.sum(1) + np.random.randn(nobs) res = OLS(y, x).fit(cov_type='HC1') assert np.isnan(res.fvalue) assert np.isnan(res.f_pvalue) # 2018-03-05 disabling smoke-test from upstream # res.summary() res = WLS(y, x).fit(cov_type='HC1') assert np.isnan(res.fvalue) assert np.isnan(res.f_pvalue)
def setup_class(cls): dta = datasets.ccard.load(as_pandas=False) dta.exog = add_constant(dta.exog, prepend=False) nobs = 72. weights = 1 / dta.exog[:, 2] # for comparison with stata analytic weights scaled_weights = ((weights * nobs) / weights.sum()) cls.res1 = WLS(dta.endog, dta.exog, weights=scaled_weights).fit() #cls.res2.wresid = scaled_weights ** .5 * cls.res2.resid # correction because we use different definition for loglike/llf corr_ic = 2 * (cls.res1.llf - cls.res2.llf) cls.res2.aic -= corr_ic cls.res2.bic -= corr_ic cls.res2.llf += 0.5 * np.sum(np.log(cls.res1.model.weights))
def setup_class(cls): dtapa = grunfeld.data.load_pandas() # Stata example/data seems to miss last firm # TODO: Is the comment above (from upstream) actionable? dtapa_endog = dtapa.endog[:200] dtapa_exog = dtapa.exog[:200] exog = add_constant(dtapa_exog[['value', 'capital']], prepend=False) # asserts don't work for pandas cls.res1 = WLS(dtapa_endog, exog, weights=1 / dtapa_exog['value']).fit() firm_names, firm_id = np.unique(np.asarray(dtapa_exog[['firm']], 'S20'), return_inverse=True) cls.groups = firm_id # time indicator in range(max Ti) time = np.asarray(dtapa_exog[['year']]) time -= time.min() cls.time = np.squeeze(time).astype(int) # nw_panel function requires interval bounds cls.tidx = [(i * 20, 20 * (i + 1)) for i in range(10)]
def test_finite_weight_sigma(bad_value, use_pandas): # GH#4969 endog = np.random.randn(100) exog = np.random.randn(100, 2) weights = sigma = np.ones(100) weights[-2:] = bad_value if use_pandas: sigma = weights = pd.Series(weights) with pytest.raises(MissingDataError) as err: WLS(endog, exog, weights=weights) assert err.type is MissingDataError assert 'weights' in err.value.args[0] with pytest.raises(MissingDataError) as err: GLS(endog, exog, sigma=sigma) assert err.type is MissingDataError assert 'sigma' in err.value.args[0]
def test_predict_se(): # this test doesn't use reference values # checks consistency across options, and compares to direct calculation # generate dataset nsample = 50 x1 = np.linspace(0, 20, nsample) x = np.c_[x1, (x1 - 5)**2, np.ones(nsample)] np.random.seed(0) # TODO: Upstream had commented-out seeds 9876789, 9876543; # figure out why 0 is used instead of those beta = [0.5, -0.01, 5.] y_true2 = np.dot(x, beta) w = np.ones(nsample) w[int(nsample * 6. / 10):] = 3 sig = 0.5 y2 = y_true2 + sig * w * np.random.normal(size=nsample) x2 = x[:, [0, 2]] # estimate OLS res2 = OLS(y2, x2).fit() # direct calculation covb = res2.cov_params() predvar = res2.mse_resid + (x2 * np.dot(covb, x2.T).T).sum(1) predstd = np.sqrt(predvar) prstd, iv_l, iv_u = wls_prediction_std(res2) np.testing.assert_almost_equal(prstd, predstd, 15) # stats.t.isf(0.05/2., 50 - 2) q = 2.0106347546964458 ci_half = q * predstd assert_allclose(iv_u, res2.fittedvalues + ci_half, rtol=1e-12) assert_allclose(iv_l, res2.fittedvalues - ci_half, rtol=1e-12) prstd, iv_l, iv_u = wls_prediction_std(res2, x2[:3, :]) assert_equal(prstd, prstd[:3]) assert_allclose(iv_u, res2.fittedvalues[:3] + ci_half[:3], rtol=1e-12) assert_allclose(iv_l, res2.fittedvalues[:3] - ci_half[:3], rtol=1e-12) # check WLS res3 = WLS(y2, x2, 1. / w).fit() # direct calculation covb = res3.cov_params() predvar = res3.mse_resid * w + (x2 * np.dot(covb, x2.T).T).sum(1) predstd = np.sqrt(predvar) prstd, iv_l, iv_u = wls_prediction_std(res3) np.testing.assert_almost_equal(prstd, predstd, 15) q = 2.0106347546964458 # i.e. stats.t.isf(0.05/2., 50 - 2) ci_half = q * predstd assert_allclose(iv_u, res3.fittedvalues + ci_half, rtol=1e-12) assert_allclose(iv_l, res3.fittedvalues - ci_half, rtol=1e-12) # testing shapes of exog prstd, iv_l, iv_u = wls_prediction_std(res3, x2[-1:, :], weights=3.) assert_equal(prstd, prstd[-1]) prstd, iv_l, iv_u = wls_prediction_std(res3, x2[-1, :], weights=3.) assert_equal(prstd, prstd[-1]) prstd, iv_l, iv_u = wls_prediction_std(res3, x2[-2:, :], weights=3.) assert_equal(prstd, prstd[-2:]) prstd, iv_l, iv_u = wls_prediction_std(res3, x2[-2:, :], weights=[3, 3]) assert_equal(prstd, prstd[-2:]) prstd, iv_l, iv_u = wls_prediction_std(res3, x2[:3, :]) assert_equal(prstd, prstd[:3]) assert_allclose(iv_u, res3.fittedvalues[:3] + ci_half[:3], rtol=1e-12) assert_allclose(iv_l, res3.fittedvalues[:3] - ci_half[:3], rtol=1e-12) # use wrong size for exog # prstd, iv_l, iv_u = wls_prediction_std(res3, x2[-1, 0], weights=3.) with pytest.raises(ValueError): wls_prediction_std(res3, x2[-1, 0], weights=3.) # check some weight values sew1 = wls_prediction_std(res3, x2[-3:, :])[0]**2 for wv in np.linspace(0.5, 3, 5): sew = wls_prediction_std(res3, x2[-3:, :], weights=1. / wv)[0]**2 assert_allclose(sew, sew1 + res3.scale * (wv - 1))
def setup_class(cls): cls.exog = np.ones((1,)) cls.endog = np.ones((1,)) weights = 1 cls.wls_res = WLS(cls.endog, cls.exog, weights=weights).fit()
def test_wrong_size_weights(self): with pytest.raises(ValueError): WLS(self.endog, self.exog, weights=np.ones((10, 10)))
def setup_class(cls): data = datasets.longley.load(as_pandas=False) data.exog = add_constant(data.exog, prepend=False) cls.res1 = OLS(data.endog, data.exog).fit() cls.res2 = WLS(data.endog, data.exog).fit()