def test_predict_missing(self): ex = self.data[:5].copy() ex.iloc[0, 1] = np.nan predicted1 = self.res.predict(ex) predicted2 = self.res.predict(ex[1:]) assert_index_equal(predicted1.index, ex.index) assert_series_equal(predicted1[1:], predicted2) assert_equal(predicted1.values[0], np.nan)
def test_unobserved_components_time_varying(revisions, updates): # This is primarily a test that the `news` method works with a time-varying # setup (i.e. time-varying state space matrices). It tests a time-varying # UnobservedComponents model where the time-varying component has been set # to zeros against a time-invariant version of the model. # Construct previous and updated datasets endog = dta['infl'].copy() comparison_type = None if updates: endog1 = endog.loc[:'2009Q2'].copy() endog2 = endog.loc[:'2009Q3'].copy() else: endog1 = endog.loc[:'2009Q3'].copy() endog2 = endog.loc[:'2009Q3'].copy() # Without updates and without NaN values, we need to specify that # the type of the comparison object that we're passing is "updated" comparison_type = 'updated' if revisions: endog1.iloc[-1] = 0. exog1 = np.ones_like(endog1) exog2 = np.ones_like(endog2) # Compute the news from a model with a trend/exog term (so the model is # time-varying), but with the coefficient set to zero (so that it will be # equivalent to the time-invariant model) mod1 = structural.UnobservedComponents(endog1, 'llevel', exog=exog1) res1 = mod1.smooth([0.5, 0.2, 0.0]) news1 = res1.news(endog2, exog=exog2, start='2008Q1', end='2009Q3', comparison_type=comparison_type) # Compute the news from a model without a trend term mod2 = structural.UnobservedComponents(endog1, 'llevel') res2 = mod2.smooth([0.5, 0.2]) news2 = res2.news(endog2, start='2008Q1', end='2009Q3', comparison_type=comparison_type) attrs = [ 'total_impacts', 'update_impacts', 'revision_impacts', 'news', 'weights', 'update_forecasts', 'update_realized', 'prev_impacted_forecasts', 'post_impacted_forecasts', 'revisions_iloc', 'revisions_ix', 'updates_iloc', 'updates_ix' ] for attr in attrs: w = getattr(news1, attr) x = getattr(news2, attr) if isinstance(x, pd.Series): assert_series_equal(w, x) else: assert_frame_equal(w, x)
def test_noop(self): df = make_dataframe() df.values[[2, 5, 10], [2, 3, 1]] = np.nan y, X = df[df.columns[0]], df[df.columns[1:]] data, _ = sm_data.handle_missing(y, X, missing='none') y_exp, X_exp = df[df.columns[0]], df[df.columns[1:]] assert_frame_equal(data['exog'], X_exp) assert_series_equal(data['endog'], y_exp)
def test_predict_missing(self): ex = self.data[:5].copy() ex.iloc[0, 1] = np.nan predicted1 = self.res.predict(ex) predicted2 = self.res.predict(ex[1:]) assert_index_equal(predicted1.index, ex.index) assert_series_equal(predicted1[1:], predicted2) assert_equal(predicted1.values[0], np.nan)
def check_predict_types(results): """ Check that the `predict` method of the given results object produces the correct output type. Parameters ---------- results : Results Raises ------ AssertionError """ res = results # squeeze to make 1d for single regressor test case p_exog = np.squeeze(np.asarray(res.model.exog[:2])) # ignore wrapper for isinstance check from statsmodels.genmod.generalized_linear_model import GLMResults from statsmodels.discrete.discrete_model import DiscreteResults from statsmodels.compat.pandas import assert_frame_equal, assert_series_equal # possibly unwrap -- GEE has no wrapper results = getattr(results, '_results', results) if isinstance(results, (GLMResults, DiscreteResults)): # SMOKE test only TODO: mark this somehow res.predict(p_exog) res.predict(p_exog.tolist()) res.predict(p_exog[0].tolist()) else: fitted = res.fittedvalues[:2] assert_allclose(fitted, res.predict(p_exog), rtol=1e-12) # this needs reshape to column-vector: assert_allclose(fitted, res.predict(np.squeeze(p_exog).tolist()), rtol=1e-12) # only one prediction: assert_allclose(fitted[:1], res.predict(p_exog[0].tolist()), rtol=1e-12) assert_allclose(fitted[:1], res.predict(p_exog[0]), rtol=1e-12) # Check that pandas wrapping works as expected exog_index = range(len(p_exog)) predicted = res.predict(p_exog) cls = pd.Series if p_exog.ndim == 1 else pd.DataFrame predicted_pandas = res.predict(cls(p_exog, index=exog_index)) # predicted.ndim may not match p_exog.ndim because it may be squeezed # if p_exog has only one column cls = pd.Series if predicted.ndim == 1 else pd.DataFrame predicted_expected = cls(predicted, index=exog_index) if isinstance(predicted_expected, pd.Series): assert_series_equal(predicted_expected, predicted_pandas) else: assert_frame_equal(predicted_expected, predicted_pandas)
def test_detrend_series(self): data = pd.Series(self.data_1d, name='one') detrended = sm.tsa.detrend(data, order=1) assert_array_almost_equal(detrended.values, np.zeros_like(data)) assert_series_equal(detrended, pd.Series(detrended.values, name='one')) detrended = sm.tsa.detrend(data, order=0) assert_array_almost_equal(detrended.values, pd.Series([-2, -1, 0, 1, 2])) assert_series_equal(detrended, pd.Series(detrended.values, name='one'))
def test_dynamic_factor_time_varying(revisions, updates): # This is primarily a test that the `news` method works with a time-varying # setup (i.e. time-varying state space matrices). It tests a time-varying # DynamicFactor model where the time-varying component has been set to # zeros against a time-invariant version of the model. # Construct previous and updated datasets endog = dta[['realgdp', 'unemp']].copy() endog['realgdp'] = np.log(endog['realgdp']).diff() * 400 endog = endog.iloc[1:] comparison_type = None if updates: endog1 = endog.loc[:'2009Q2'].copy() endog2 = endog.loc[:'2009Q3'].copy() else: endog1 = endog.loc[:'2009Q3'].copy() endog2 = endog.loc[:'2009Q3'].copy() # Without updates and without NaN values, we need to specify that # the type of the comparison object that we're passing is "updated" comparison_type = 'updated' if revisions: # TODO: add test for only one of the variables revising? endog1.iloc[-1] = 0. exog1 = np.ones_like(endog1['realgdp']) exog2 = np.ones_like(endog2['realgdp']) params1 = np.r_[0.9, 0.2, 0.0, 0.0, 1.2, 1.1, 0.5, 0.2] params2 = np.r_[0.9, 0.2, 1.2, 1.1, 0.5, 0.2] # Compute the news from a model with an exog term (so the model is # time-varying), but with the coefficient set to zero (so that it will be # equivalent to the time-invariant model) mod1 = dynamic_factor.DynamicFactor(endog1, exog=exog1, k_factors=1, factor_order=2) res1 = mod1.smooth(params1) news1 = res1.news(endog2, exog=exog2, start='2008Q1', end='2009Q3', comparison_type=comparison_type) # Compute the news from a model without a trend term mod2 = dynamic_factor.DynamicFactor(endog1, k_factors=1, factor_order=2) res2 = mod2.smooth(params2) news2 = res2.news(endog2, start='2008Q1', end='2009Q3', comparison_type=comparison_type) attrs = ['total_impacts', 'update_impacts', 'revision_impacts', 'news', 'weights', 'update_forecasts', 'update_realized', 'prev_impacted_forecasts', 'post_impacted_forecasts', 'revisions_iloc', 'revisions_ix', 'updates_iloc', 'updates_ix'] for attr in attrs: w = getattr(news1, attr) x = getattr(news2, attr) if isinstance(x, pd.Series): assert_series_equal(w, x) else: assert_frame_equal(w, x)
def test_pandas_array(self): df = make_dataframe() df.values[[2, 5, 10], [2, 3, 1]] = np.nan y, X = df[df.columns[0]], df[df.columns[1:]].values data, _ = sm_data.handle_missing(y, X, missing='drop') df = df.dropna() y_exp, X_exp = df[df.columns[0]], df[df.columns[1:]].values np.testing.assert_array_equal(data['exog'], X_exp) assert_series_equal(data['endog'], y_exp)
def test__ros_group_rank(): df = pandas.DataFrame({ 'dl_idx': [1] * 12, 'params': list('AABCCCDE') + list('DCBA'), 'values': list(range(12)) }) result = ros._ros_group_rank(df, 'dl_idx', 'params') expected = pandas.Series([1, 2, 1, 1, 2, 3, 1, 1, 2, 4, 2, 3], name='rank') assert_series_equal(result.astype(int), expected.astype(int))
def test_attach(self): data = self.data # this makes sure what the wrappers need work but not the wrapped # results themselves assert_series_equal(data.wrap_output(self.col_input, 'columns'), self.col_result) assert_series_equal(data.wrap_output(self.row_input, 'rows'), self.row_result) assert_frame_equal(data.wrap_output(self.cov_input, 'cov'), self.cov_result)
def test_predict_types(self): res = self.results # squeeze to make 1d for single regressor test case p_exog = np.squeeze(np.asarray(res.model.exog[:2])) # ignore wrapper for isinstance check from statsmodels.genmod.generalized_linear_model import GLMResults from statsmodels.discrete.discrete_model import DiscreteResults # FIXME: work around GEE has no wrapper if hasattr(self.results, '_results'): results = self.results._results else: results = self.results if isinstance(results, (GLMResults, DiscreteResults)): # SMOKE test only TODO res.predict(p_exog) res.predict(p_exog.tolist()) res.predict(p_exog[0].tolist()) else: from pandas.util.testing import assert_series_equal fitted = res.fittedvalues[:2] assert_allclose(fitted, res.predict(p_exog), rtol=1e-12) # this needs reshape to column-vector: assert_allclose(fitted, res.predict(np.squeeze(p_exog).tolist()), rtol=1e-12) # only one prediction: assert_allclose(fitted[:1], res.predict(p_exog[0].tolist()), rtol=1e-12) assert_allclose(fitted[:1], res.predict(p_exog[0]), rtol=1e-12) exog_index = range(len(p_exog)) predicted = res.predict(p_exog) if p_exog.ndim == 1: predicted_pandas = res.predict( pd.Series(p_exog, index=exog_index)) else: predicted_pandas = res.predict( pd.DataFrame(p_exog, index=exog_index)) if predicted.ndim == 1: assert_(isinstance(predicted_pandas, pd.Series)) predicted_expected = pd.Series(predicted, index=exog_index) assert_series_equal(predicted_expected, predicted_pandas) else: assert_(isinstance(predicted_pandas, pd.DataFrame)) predicted_expected = pd.DataFrame(predicted, index=exog_index) assert_(predicted_expected.equals(predicted_pandas))
def test_predict_types(self): res = self.results # squeeze to make 1d for single regressor test case p_exog = np.squeeze(np.asarray(res.model.exog[:2])) # ignore wrapper for isinstance check from statsmodels.genmod.generalized_linear_model import GLMResults from statsmodels.discrete.discrete_model import DiscreteResults # FIXME: work around GEE has no wrapper if hasattr(self.results, '_results'): results = self.results._results else: results = self.results if isinstance(results, (GLMResults, DiscreteResults)): # SMOKE test only TODO res.predict(p_exog) res.predict(p_exog.tolist()) res.predict(p_exog[0].tolist()) else: from pandas.util.testing import assert_series_equal fitted = res.fittedvalues[:2] assert_allclose(fitted, res.predict(p_exog), rtol=1e-12) # this needs reshape to column-vector: assert_allclose(fitted, res.predict(np.squeeze(p_exog).tolist()), rtol=1e-12) # only one prediction: assert_allclose(fitted[:1], res.predict(p_exog[0].tolist()), rtol=1e-12) assert_allclose(fitted[:1], res.predict(p_exog[0]), rtol=1e-12) exog_index = range(len(p_exog)) predicted = res.predict(p_exog) if p_exog.ndim == 1: predicted_pandas = res.predict(pd.Series(p_exog, index=exog_index)) else: predicted_pandas = res.predict(pd.DataFrame(p_exog, index=exog_index)) if predicted.ndim == 1: assert_(isinstance(predicted_pandas, pd.Series)) predicted_expected = pd.Series(predicted, index=exog_index) assert_series_equal(predicted_expected, predicted_pandas) else: assert_(isinstance(predicted_pandas, pd.DataFrame)) predicted_expected = pd.DataFrame(predicted, index=exog_index) assert_(predicted_expected.equals(predicted_pandas))
def test_attach(self): data = self.data assert_series_equal(data.wrap_output(self.col_input, 'columns'), self.col_result) assert_series_equal(data.wrap_output(self.row_input, 'rows'), self.row_result) assert_frame_equal(data.wrap_output(self.cov_input, 'cov'), self.cov_result) assert_frame_equal(data.wrap_output(self.cov_eq_input, 'cov_eq'), self.cov_eq_result) assert_frame_equal(data.wrap_output(self.col_eq_input, 'columns_eq'), self.col_eq_result)
def test_categorical_series(string_var): design = tools.categorical(string_var, drop=True) dummies = pd.get_dummies(pd.Categorical(string_var)) assert_frame_equal(design, dummies) design = tools.categorical(string_var, drop=False) dummies.columns = list(dummies.columns) assert_frame_equal(design.iloc[:, :5], dummies) assert_series_equal(design.iloc[:, 5], string_var) _, dictnames = tools.categorical(string_var, drop=False, dictnames=True) for i, c in enumerate(pd.Categorical(string_var).categories): assert i in dictnames assert dictnames[i] == c
def test_drop(self): y = self.y X = self.X combined = np.c_[y, X] idx = ~np.isnan(combined).any(axis=1) y = y.loc[idx] X = X.loc[idx] data = sm_data.handle_data(self.y, self.X, 'drop') np.testing.assert_array_equal(data.endog, y.values) assert_series_equal(data.orig_endog, self.y.loc[idx]) np.testing.assert_array_equal(data.exog, X.values) assert_frame_equal(data.orig_exog, self.X.loc[idx])
def test_remove_data_pickle(self): results = self.results xf = self.xf pred_kwds = self.predict_kwds pred1 = results.predict(xf, **pred_kwds) # create some cached attributes results.summary() results.summary2() # SMOKE test also summary2 # uncomment the following to check whether tests run (7 failures now) # np.testing.assert_equal(res, 1) # check pickle unpickle works on full results # TODO: drop of load save is tested res, orig_nbytes = check_pickle(results._results) # remove data arrays, check predict still works if isinstance(results, glm.GLMResultsWrapper): with pytest.warns(FutureWarning, match="Anscombe residuals"): results.remove_data() else: results.remove_data() pred2 = results.predict(xf, **pred_kwds) if isinstance(pred1, pd.Series) and isinstance(pred2, pd.Series): assert_series_equal(pred1, pred2) elif isinstance(pred1, pd.DataFrame) and isinstance(pred2, pd.DataFrame): assert_(pred1.equals(pred2)) else: np.testing.assert_equal(pred2, pred1) # pickle and unpickle reduced array res, nbytes = check_pickle(results._results) # for testing attach res self.res = res assert_(nbytes < orig_nbytes, msg='pickle length not %d < %d' % (nbytes, orig_nbytes)) pred3 = results.predict(xf, **pred_kwds) if isinstance(pred1, pd.Series) and isinstance(pred3, pd.Series): assert_series_equal(pred1, pred3) elif isinstance(pred1, pd.DataFrame) and isinstance(pred3, pd.DataFrame): assert_(pred1.equals(pred3)) else: np.testing.assert_equal(pred3, pred1)
def assert_objects_equal(actual: Any, expected: Any) -> None: try: actual__dict__ = vars(actual) expected__dict__ = vars(expected) except TypeError: actual__dict__ = actual expected__dict__ = expected all_keys = set(actual__dict__.keys()).union(expected__dict__.keys()) for key in all_keys: actual_value = actual__dict__[key] expected_value = expected__dict__[key] if isinstance(actual_value, DataFrame): assert_frame_equal(actual_value, expected_value) elif isinstance(actual_value, Series): assert_series_equal(actual_value, expected_value) else: assert_equals(actual_value, expected_value)
def test_add_constant_zeros(self): a = np.zeros(100) output = tools.add_constant(a) assert_equal(output[:,0],np.ones(100)) s = pd.Series([0.0,0.0,0.0]) output = tools.add_constant(s) expected = pd.Series([1.0, 1.0, 1.0], name='const') assert_series_equal(expected, output['const']) df = pd.DataFrame([[0.0, 'a', 4], [0.0, 'bc', 9], [0.0, 'def', 16]]) output = tools.add_constant(df) dfc = df.copy() dfc.insert(0, 'const', np.ones(3)) assert_frame_equal(dfc, output) df = pd.DataFrame([[1.0, 'a', 0], [0.0, 'bc', 0], [0.0, 'def', 0]]) output = tools.add_constant(df) dfc = df.copy() dfc.insert(0, 'const', np.ones(3)) assert_frame_equal(dfc, output)
def test_add_constant_zeros(self): a = np.zeros(100) output = tools.add_constant(a) assert_equal(output[:, 0], np.ones(100)) s = pd.Series([0.0, 0.0, 0.0]) output = tools.add_constant(s) expected = pd.Series([1.0, 1.0, 1.0], name="const") assert_series_equal(expected, output["const"]) df = pd.DataFrame([[0.0, "a", 4], [0.0, "bc", 9], [0.0, "def", 16]]) output = tools.add_constant(df) dfc = df.copy() dfc.insert(0, "const", np.ones(3)) assert_frame_equal(dfc, output) df = pd.DataFrame([[1.0, "a", 0], [0.0, "bc", 0], [0.0, "def", 0]]) output = tools.add_constant(df) dfc = df.copy() dfc.insert(0, "const", np.ones(3)) assert_frame_equal(dfc, output)
def test_sort(self): # data frame sorted_data, index = self.grouping.sort(self.data) expected_sorted_data = self.data.sort_index() assert_frame_equal(sorted_data, expected_sorted_data) np.testing.assert_(isinstance(sorted_data, pd.DataFrame)) np.testing.assert_(not index.equals(self.grouping.index)) # make sure it copied if hasattr(sorted_data, 'equals'): # newer pandas np.testing.assert_(not sorted_data.equals(self.data)) # 2d arrays sorted_data, index = self.grouping.sort(self.data.values) np.testing.assert_array_equal(sorted_data, expected_sorted_data.values) np.testing.assert_(isinstance(sorted_data, np.ndarray)) # 1d series series = self.data[self.data.columns[0]] sorted_data, index = self.grouping.sort(series) expected_sorted_data = series.sort_index() assert_series_equal(sorted_data, expected_sorted_data) np.testing.assert_(isinstance(sorted_data, pd.Series)) if hasattr(sorted_data, 'equals'): np.testing.assert_(not sorted_data.equals(series)) # 1d array array = series.values sorted_data, index = self.grouping.sort(array) expected_sorted_data = series.sort_index().values np.testing.assert_array_equal(sorted_data, expected_sorted_data) np.testing.assert_(isinstance(sorted_data, np.ndarray))
def test_formula_predict_series(): data = pd.DataFrame({"y": [1, 2, 3], "x": [1, 2, 3]}, index=[5, 3, 1]) results = ols('y ~ x', data).fit() result = results.predict(data) expected = pd.Series([1., 2., 3.], index=[5, 3, 1]) assert_series_equal(result, expected) result = results.predict(data.x) assert_series_equal(result, expected) result = results.predict(pd.Series([1, 2, 3], index=[1, 2, 3], name='x')) expected = pd.Series([1., 2., 3.], index=[1, 2, 3]) assert_series_equal(result, expected) result = results.predict({"x": [1, 2, 3]}) expected = pd.Series([1., 2., 3.], index=[0, 1, 2]) assert_series_equal(result, expected)
def test_defaults(revisions, updates): # Construct previous and updated datasets endog = dta['infl'].copy() if updates: endog1 = endog.loc[:'2009Q2'].copy() endog2 = endog.loc[:'2009Q3'].copy() else: endog1 = endog.loc[:'2009Q3'].copy() endog2 = endog.loc[:'2009Q3'].copy() if revisions: # TODO: add test for only one of the variables revising? endog1.iloc[-1] = 0. # Get the previous results object and compute the news mod1 = sarimax.SARIMAX(endog1) res1 = mod1.smooth([0.5, 1.0]) mod2 = sarimax.SARIMAX(endog2) res2 = mod2.smooth([0.5, 1.0]) news_updated_data = res1.news(endog2, comparison_type='updated') news_previous_data = res2.news(endog1, comparison_type='previous') news_updated_results = res1.news(res2, comparison_type='updated') news_previous_results = res2.news(res1, comparison_type='previous') attrs = [ 'total_impacts', 'update_impacts', 'revision_impacts', 'news', 'weights', 'update_forecasts', 'update_realized', 'prev_impacted_forecasts', 'post_impacted_forecasts', 'revisions_iloc', 'revisions_ix', 'updates_iloc', 'updates_ix' ] for attr in attrs: w = getattr(news_updated_data, attr) x = getattr(news_previous_data, attr) y = getattr(news_updated_results, attr) z = getattr(news_previous_results, attr) if isinstance(x, pd.Series): assert_series_equal(w, x) assert_series_equal(w, y) assert_series_equal(w, z) else: assert_frame_equal(w, x) assert_frame_equal(w, y) assert_frame_equal(w, z)
def test_orig(self): assert_series_equal(self.data.orig_endog, self.endog) assert_series_equal(self.data.orig_exog, self.exog)
def test_add_constant_series(self): s = pd.Series([1.0, 2.0, 3.0]) output = tools.add_constant(s) expected = pd.Series([1.0, 1.0, 1.0], name="const") assert_series_equal(expected, output["const"])