def test_predict_missing(self):
        ex = self.data[:5].copy()
        ex.iloc[0, 1] = np.nan
        predicted1 = self.res.predict(ex)
        predicted2 = self.res.predict(ex[1:])

        assert_index_equal(predicted1.index, ex.index)
        assert_series_equal(predicted1[1:], predicted2)
        assert_equal(predicted1.values[0], np.nan)
Пример #2
0
def test_unobserved_components_time_varying(revisions, updates):
    # This is primarily a test that the `news` method works with a time-varying
    # setup (i.e. time-varying state space matrices). It tests a time-varying
    # UnobservedComponents model where the time-varying component has been set
    # to zeros against a time-invariant version of the model.

    # Construct previous and updated datasets
    endog = dta['infl'].copy()
    comparison_type = None
    if updates:
        endog1 = endog.loc[:'2009Q2'].copy()
        endog2 = endog.loc[:'2009Q3'].copy()
    else:
        endog1 = endog.loc[:'2009Q3'].copy()
        endog2 = endog.loc[:'2009Q3'].copy()
        # Without updates and without NaN values, we need to specify that
        # the type of the comparison object that we're passing is "updated"
        comparison_type = 'updated'
    if revisions:
        endog1.iloc[-1] = 0.

    exog1 = np.ones_like(endog1)
    exog2 = np.ones_like(endog2)

    # Compute the news from a model with a trend/exog term (so the model is
    # time-varying), but with the coefficient set to zero (so that it will be
    # equivalent to the time-invariant model)
    mod1 = structural.UnobservedComponents(endog1, 'llevel', exog=exog1)
    res1 = mod1.smooth([0.5, 0.2, 0.0])
    news1 = res1.news(endog2,
                      exog=exog2,
                      start='2008Q1',
                      end='2009Q3',
                      comparison_type=comparison_type)

    # Compute the news from a model without a trend term
    mod2 = structural.UnobservedComponents(endog1, 'llevel')
    res2 = mod2.smooth([0.5, 0.2])
    news2 = res2.news(endog2,
                      start='2008Q1',
                      end='2009Q3',
                      comparison_type=comparison_type)

    attrs = [
        'total_impacts', 'update_impacts', 'revision_impacts', 'news',
        'weights', 'update_forecasts', 'update_realized',
        'prev_impacted_forecasts', 'post_impacted_forecasts', 'revisions_iloc',
        'revisions_ix', 'updates_iloc', 'updates_ix'
    ]

    for attr in attrs:
        w = getattr(news1, attr)
        x = getattr(news2, attr)
        if isinstance(x, pd.Series):
            assert_series_equal(w, x)
        else:
            assert_frame_equal(w, x)
Пример #3
0
    def test_noop(self):
        df = make_dataframe()
        df.values[[2, 5, 10], [2, 3, 1]] = np.nan
        y, X = df[df.columns[0]], df[df.columns[1:]]
        data, _ = sm_data.handle_missing(y, X, missing='none')

        y_exp, X_exp = df[df.columns[0]], df[df.columns[1:]]
        assert_frame_equal(data['exog'], X_exp)
        assert_series_equal(data['endog'], y_exp)
    def test_predict_missing(self):
        ex = self.data[:5].copy()
        ex.iloc[0, 1] = np.nan
        predicted1 = self.res.predict(ex)
        predicted2 = self.res.predict(ex[1:])

        assert_index_equal(predicted1.index, ex.index)
        assert_series_equal(predicted1[1:], predicted2)
        assert_equal(predicted1.values[0], np.nan)
Пример #5
0
def check_predict_types(results):
    """
    Check that the `predict` method of the given results object produces the
    correct output type.

    Parameters
    ----------
    results : Results

    Raises
    ------
    AssertionError
    """
    res = results
    # squeeze to make 1d for single regressor test case
    p_exog = np.squeeze(np.asarray(res.model.exog[:2]))

    # ignore wrapper for isinstance check
    from statsmodels.genmod.generalized_linear_model import GLMResults
    from statsmodels.discrete.discrete_model import DiscreteResults
    from statsmodels.compat.pandas import assert_frame_equal, assert_series_equal

    # possibly unwrap -- GEE has no wrapper
    results = getattr(results, '_results', results)

    if isinstance(results, (GLMResults, DiscreteResults)):
        # SMOKE test only  TODO: mark this somehow
        res.predict(p_exog)
        res.predict(p_exog.tolist())
        res.predict(p_exog[0].tolist())
    else:
        fitted = res.fittedvalues[:2]
        assert_allclose(fitted, res.predict(p_exog), rtol=1e-12)
        # this needs reshape to column-vector:
        assert_allclose(fitted, res.predict(np.squeeze(p_exog).tolist()),
                        rtol=1e-12)
        # only one prediction:
        assert_allclose(fitted[:1], res.predict(p_exog[0].tolist()),
                        rtol=1e-12)
        assert_allclose(fitted[:1], res.predict(p_exog[0]),
                        rtol=1e-12)

        # Check that pandas wrapping works as expected
        exog_index = range(len(p_exog))
        predicted = res.predict(p_exog)

        cls = pd.Series if p_exog.ndim == 1 else pd.DataFrame
        predicted_pandas = res.predict(cls(p_exog, index=exog_index))

        # predicted.ndim may not match p_exog.ndim because it may be squeezed
        #  if p_exog has only one column
        cls = pd.Series if predicted.ndim == 1 else pd.DataFrame
        predicted_expected = cls(predicted, index=exog_index)
        if isinstance(predicted_expected, pd.Series):
            assert_series_equal(predicted_expected, predicted_pandas)
        else:
            assert_frame_equal(predicted_expected, predicted_pandas)
Пример #6
0
 def test_detrend_series(self):
     data = pd.Series(self.data_1d, name='one')
     detrended = sm.tsa.detrend(data, order=1)
     assert_array_almost_equal(detrended.values, np.zeros_like(data))
     assert_series_equal(detrended, pd.Series(detrended.values, name='one'))
     detrended = sm.tsa.detrend(data, order=0)
     assert_array_almost_equal(detrended.values,
                               pd.Series([-2, -1, 0, 1, 2]))
     assert_series_equal(detrended, pd.Series(detrended.values, name='one'))
Пример #7
0
def test_dynamic_factor_time_varying(revisions, updates):
    # This is primarily a test that the `news` method works with a time-varying
    # setup (i.e. time-varying state space matrices). It tests a time-varying
    # DynamicFactor model where the time-varying component has been set to
    # zeros against a time-invariant version of the model.

    # Construct previous and updated datasets
    endog = dta[['realgdp', 'unemp']].copy()
    endog['realgdp'] = np.log(endog['realgdp']).diff() * 400
    endog = endog.iloc[1:]
    comparison_type = None
    if updates:
        endog1 = endog.loc[:'2009Q2'].copy()
        endog2 = endog.loc[:'2009Q3'].copy()
    else:
        endog1 = endog.loc[:'2009Q3'].copy()
        endog2 = endog.loc[:'2009Q3'].copy()
        # Without updates and without NaN values, we need to specify that
        # the type of the comparison object that we're passing is "updated"
        comparison_type = 'updated'
    if revisions:
        # TODO: add test for only one of the variables revising?
        endog1.iloc[-1] = 0.

    exog1 = np.ones_like(endog1['realgdp'])
    exog2 = np.ones_like(endog2['realgdp'])
    params1 = np.r_[0.9, 0.2, 0.0, 0.0, 1.2, 1.1, 0.5, 0.2]
    params2 = np.r_[0.9, 0.2, 1.2, 1.1, 0.5, 0.2]

    # Compute the news from a model with an exog term (so the model is
    # time-varying), but with the coefficient set to zero (so that it will be
    # equivalent to the time-invariant model)
    mod1 = dynamic_factor.DynamicFactor(endog1, exog=exog1,
                                        k_factors=1, factor_order=2)
    res1 = mod1.smooth(params1)
    news1 = res1.news(endog2, exog=exog2, start='2008Q1', end='2009Q3',
                      comparison_type=comparison_type)

    # Compute the news from a model without a trend term
    mod2 = dynamic_factor.DynamicFactor(endog1, k_factors=1, factor_order=2)
    res2 = mod2.smooth(params2)
    news2 = res2.news(endog2, start='2008Q1', end='2009Q3',
                      comparison_type=comparison_type)

    attrs = ['total_impacts', 'update_impacts', 'revision_impacts', 'news',
             'weights', 'update_forecasts', 'update_realized',
             'prev_impacted_forecasts', 'post_impacted_forecasts',
             'revisions_iloc', 'revisions_ix', 'updates_iloc', 'updates_ix']

    for attr in attrs:
        w = getattr(news1, attr)
        x = getattr(news2, attr)
        if isinstance(x, pd.Series):
            assert_series_equal(w, x)
        else:
            assert_frame_equal(w, x)
Пример #8
0
    def test_pandas_array(self):
        df = make_dataframe()
        df.values[[2, 5, 10], [2, 3, 1]] = np.nan
        y, X = df[df.columns[0]], df[df.columns[1:]].values
        data, _ = sm_data.handle_missing(y, X, missing='drop')

        df = df.dropna()
        y_exp, X_exp = df[df.columns[0]], df[df.columns[1:]].values
        np.testing.assert_array_equal(data['exog'], X_exp)
        assert_series_equal(data['endog'], y_exp)
Пример #9
0
def test__ros_group_rank():
    df = pandas.DataFrame({
        'dl_idx': [1] * 12,
        'params': list('AABCCCDE') + list('DCBA'),
        'values': list(range(12))
    })

    result = ros._ros_group_rank(df, 'dl_idx', 'params')
    expected = pandas.Series([1, 2, 1, 1, 2, 3, 1, 1, 2, 4, 2, 3], name='rank')
    assert_series_equal(result.astype(int), expected.astype(int))
Пример #10
0
 def test_attach(self):
     data = self.data
     # this makes sure what the wrappers need work but not the wrapped
     # results themselves
     assert_series_equal(data.wrap_output(self.col_input, 'columns'),
                         self.col_result)
     assert_series_equal(data.wrap_output(self.row_input, 'rows'),
                         self.row_result)
     assert_frame_equal(data.wrap_output(self.cov_input, 'cov'),
                        self.cov_result)
    def test_predict_types(self):

        res = self.results
        # squeeze to make 1d for single regressor test case
        p_exog = np.squeeze(np.asarray(res.model.exog[:2]))

        # ignore wrapper for isinstance check
        from statsmodels.genmod.generalized_linear_model import GLMResults
        from statsmodels.discrete.discrete_model import DiscreteResults

        # FIXME: work around GEE has no wrapper
        if hasattr(self.results, '_results'):
            results = self.results._results
        else:
            results = self.results

        if isinstance(results, (GLMResults, DiscreteResults)):
            # SMOKE test only  TODO
            res.predict(p_exog)
            res.predict(p_exog.tolist())
            res.predict(p_exog[0].tolist())
        else:
            from pandas.util.testing import assert_series_equal

            fitted = res.fittedvalues[:2]
            assert_allclose(fitted, res.predict(p_exog), rtol=1e-12)
            # this needs reshape to column-vector:
            assert_allclose(fitted,
                            res.predict(np.squeeze(p_exog).tolist()),
                            rtol=1e-12)
            # only one prediction:
            assert_allclose(fitted[:1],
                            res.predict(p_exog[0].tolist()),
                            rtol=1e-12)
            assert_allclose(fitted[:1], res.predict(p_exog[0]), rtol=1e-12)

            exog_index = range(len(p_exog))
            predicted = res.predict(p_exog)

            if p_exog.ndim == 1:
                predicted_pandas = res.predict(
                    pd.Series(p_exog, index=exog_index))
            else:
                predicted_pandas = res.predict(
                    pd.DataFrame(p_exog, index=exog_index))

            if predicted.ndim == 1:
                assert_(isinstance(predicted_pandas, pd.Series))
                predicted_expected = pd.Series(predicted, index=exog_index)
                assert_series_equal(predicted_expected, predicted_pandas)

            else:
                assert_(isinstance(predicted_pandas, pd.DataFrame))
                predicted_expected = pd.DataFrame(predicted, index=exog_index)
                assert_(predicted_expected.equals(predicted_pandas))
Пример #12
0
    def test_predict_types(self):

        res = self.results
        # squeeze to make 1d for single regressor test case
        p_exog = np.squeeze(np.asarray(res.model.exog[:2]))

        # ignore wrapper for isinstance check
        from statsmodels.genmod.generalized_linear_model import GLMResults
        from statsmodels.discrete.discrete_model import DiscreteResults

        # FIXME: work around GEE has no wrapper
        if hasattr(self.results, '_results'):
            results = self.results._results
        else:
            results = self.results

        if isinstance(results, (GLMResults, DiscreteResults)):
            # SMOKE test only  TODO
            res.predict(p_exog)
            res.predict(p_exog.tolist())
            res.predict(p_exog[0].tolist())
        else:
            from pandas.util.testing import assert_series_equal

            fitted = res.fittedvalues[:2]
            assert_allclose(fitted, res.predict(p_exog), rtol=1e-12)
            # this needs reshape to column-vector:
            assert_allclose(fitted, res.predict(np.squeeze(p_exog).tolist()),
                            rtol=1e-12)
            # only one prediction:
            assert_allclose(fitted[:1], res.predict(p_exog[0].tolist()),
                            rtol=1e-12)
            assert_allclose(fitted[:1], res.predict(p_exog[0]),
                            rtol=1e-12)

            exog_index = range(len(p_exog))
            predicted = res.predict(p_exog)

            if p_exog.ndim == 1:
                predicted_pandas = res.predict(pd.Series(p_exog,
                                                         index=exog_index))
            else:
                predicted_pandas = res.predict(pd.DataFrame(p_exog,
                                                            index=exog_index))

            if predicted.ndim == 1:
                assert_(isinstance(predicted_pandas, pd.Series))
                predicted_expected = pd.Series(predicted, index=exog_index)
                assert_series_equal(predicted_expected, predicted_pandas)

            else:
                assert_(isinstance(predicted_pandas, pd.DataFrame))
                predicted_expected = pd.DataFrame(predicted, index=exog_index)
                assert_(predicted_expected.equals(predicted_pandas))
Пример #13
0
 def test_attach(self):
     data = self.data
     assert_series_equal(data.wrap_output(self.col_input, 'columns'),
                         self.col_result)
     assert_series_equal(data.wrap_output(self.row_input, 'rows'),
                         self.row_result)
     assert_frame_equal(data.wrap_output(self.cov_input, 'cov'),
                        self.cov_result)
     assert_frame_equal(data.wrap_output(self.cov_eq_input, 'cov_eq'),
                        self.cov_eq_result)
     assert_frame_equal(data.wrap_output(self.col_eq_input, 'columns_eq'),
                        self.col_eq_result)
Пример #14
0
def test_categorical_series(string_var):
    design = tools.categorical(string_var, drop=True)
    dummies = pd.get_dummies(pd.Categorical(string_var))
    assert_frame_equal(design, dummies)
    design = tools.categorical(string_var, drop=False)
    dummies.columns = list(dummies.columns)
    assert_frame_equal(design.iloc[:, :5], dummies)
    assert_series_equal(design.iloc[:, 5], string_var)
    _, dictnames = tools.categorical(string_var, drop=False, dictnames=True)
    for i, c in enumerate(pd.Categorical(string_var).categories):
        assert i in dictnames
        assert dictnames[i] == c
Пример #15
0
 def test_drop(self):
     y = self.y
     X = self.X
     combined = np.c_[y, X]
     idx = ~np.isnan(combined).any(axis=1)
     y = y.loc[idx]
     X = X.loc[idx]
     data = sm_data.handle_data(self.y, self.X, 'drop')
     np.testing.assert_array_equal(data.endog, y.values)
     assert_series_equal(data.orig_endog, self.y.loc[idx])
     np.testing.assert_array_equal(data.exog, X.values)
     assert_frame_equal(data.orig_exog, self.X.loc[idx])
    def test_remove_data_pickle(self):

        results = self.results
        xf = self.xf
        pred_kwds = self.predict_kwds
        pred1 = results.predict(xf, **pred_kwds)
        # create some cached attributes
        results.summary()
        results.summary2()  # SMOKE test also summary2

        # uncomment the following to check whether tests run (7 failures now)
        # np.testing.assert_equal(res, 1)

        # check pickle unpickle works on full results
        # TODO: drop of load save is tested
        res, orig_nbytes = check_pickle(results._results)

        # remove data arrays, check predict still works
        if isinstance(results, glm.GLMResultsWrapper):
            with pytest.warns(FutureWarning, match="Anscombe residuals"):
                results.remove_data()
        else:
            results.remove_data()

        pred2 = results.predict(xf, **pred_kwds)

        if isinstance(pred1, pd.Series) and isinstance(pred2, pd.Series):
            assert_series_equal(pred1, pred2)
        elif isinstance(pred1, pd.DataFrame) and isinstance(pred2,
                                                            pd.DataFrame):
            assert_(pred1.equals(pred2))
        else:
            np.testing.assert_equal(pred2, pred1)

        # pickle and unpickle reduced array
        res, nbytes = check_pickle(results._results)

        # for testing attach res
        self.res = res

        assert_(nbytes < orig_nbytes,
                msg='pickle length not %d < %d' % (nbytes, orig_nbytes))

        pred3 = results.predict(xf, **pred_kwds)

        if isinstance(pred1, pd.Series) and isinstance(pred3, pd.Series):
            assert_series_equal(pred1, pred3)
        elif isinstance(pred1, pd.DataFrame) and isinstance(pred3,
                                                            pd.DataFrame):
            assert_(pred1.equals(pred3))
        else:
            np.testing.assert_equal(pred3, pred1)
Пример #17
0
def assert_objects_equal(actual: Any, expected: Any) -> None:
    try:
        actual__dict__ = vars(actual)
        expected__dict__ = vars(expected)
    except TypeError:
        actual__dict__ = actual
        expected__dict__ = expected

    all_keys = set(actual__dict__.keys()).union(expected__dict__.keys())
    for key in all_keys:
        actual_value = actual__dict__[key]
        expected_value = expected__dict__[key]

        if isinstance(actual_value, DataFrame):
            assert_frame_equal(actual_value, expected_value)
        elif isinstance(actual_value, Series):
            assert_series_equal(actual_value, expected_value)
        else:
            assert_equals(actual_value, expected_value)
Пример #18
0
    def test_add_constant_zeros(self):
        a = np.zeros(100)
        output = tools.add_constant(a)
        assert_equal(output[:,0],np.ones(100))

        s = pd.Series([0.0,0.0,0.0])
        output = tools.add_constant(s)
        expected = pd.Series([1.0, 1.0, 1.0], name='const')
        assert_series_equal(expected, output['const'])

        df = pd.DataFrame([[0.0, 'a', 4], [0.0, 'bc', 9], [0.0, 'def', 16]])
        output = tools.add_constant(df)
        dfc = df.copy()
        dfc.insert(0, 'const', np.ones(3))
        assert_frame_equal(dfc, output)

        df = pd.DataFrame([[1.0, 'a', 0], [0.0, 'bc', 0], [0.0, 'def', 0]])
        output = tools.add_constant(df)
        dfc = df.copy()
        dfc.insert(0, 'const', np.ones(3))
        assert_frame_equal(dfc, output)
Пример #19
0
    def test_add_constant_zeros(self):
        a = np.zeros(100)
        output = tools.add_constant(a)
        assert_equal(output[:, 0], np.ones(100))

        s = pd.Series([0.0, 0.0, 0.0])
        output = tools.add_constant(s)
        expected = pd.Series([1.0, 1.0, 1.0], name="const")
        assert_series_equal(expected, output["const"])

        df = pd.DataFrame([[0.0, "a", 4], [0.0, "bc", 9], [0.0, "def", 16]])
        output = tools.add_constant(df)
        dfc = df.copy()
        dfc.insert(0, "const", np.ones(3))
        assert_frame_equal(dfc, output)

        df = pd.DataFrame([[1.0, "a", 0], [0.0, "bc", 0], [0.0, "def", 0]])
        output = tools.add_constant(df)
        dfc = df.copy()
        dfc.insert(0, "const", np.ones(3))
        assert_frame_equal(dfc, output)
Пример #20
0
    def test_sort(self):
        # data frame
        sorted_data, index = self.grouping.sort(self.data)
        expected_sorted_data = self.data.sort_index()

        assert_frame_equal(sorted_data, expected_sorted_data)
        np.testing.assert_(isinstance(sorted_data, pd.DataFrame))
        np.testing.assert_(not index.equals(self.grouping.index))

        # make sure it copied
        if hasattr(sorted_data, 'equals'): # newer pandas
            np.testing.assert_(not sorted_data.equals(self.data))

        # 2d arrays
        sorted_data, index = self.grouping.sort(self.data.values)
        np.testing.assert_array_equal(sorted_data,
                                      expected_sorted_data.values)
        np.testing.assert_(isinstance(sorted_data, np.ndarray))

        # 1d series
        series = self.data[self.data.columns[0]]
        sorted_data, index = self.grouping.sort(series)

        expected_sorted_data = series.sort_index()
        assert_series_equal(sorted_data, expected_sorted_data)
        np.testing.assert_(isinstance(sorted_data, pd.Series))
        if hasattr(sorted_data, 'equals'):
            np.testing.assert_(not sorted_data.equals(series))

        # 1d array
        array = series.values
        sorted_data, index = self.grouping.sort(array)

        expected_sorted_data = series.sort_index().values
        np.testing.assert_array_equal(sorted_data, expected_sorted_data)
        np.testing.assert_(isinstance(sorted_data, np.ndarray))
Пример #21
0
def test_formula_predict_series():
    data = pd.DataFrame({"y": [1, 2, 3], "x": [1, 2, 3]}, index=[5, 3, 1])
    results = ols('y ~ x', data).fit()

    result = results.predict(data)
    expected = pd.Series([1., 2., 3.], index=[5, 3, 1])
    assert_series_equal(result, expected)

    result = results.predict(data.x)
    assert_series_equal(result, expected)

    result = results.predict(pd.Series([1, 2, 3], index=[1, 2, 3], name='x'))
    expected = pd.Series([1., 2., 3.], index=[1, 2, 3])
    assert_series_equal(result, expected)

    result = results.predict({"x": [1, 2, 3]})
    expected = pd.Series([1., 2., 3.], index=[0, 1, 2])
    assert_series_equal(result, expected)
Пример #22
0
def test_defaults(revisions, updates):
    # Construct previous and updated datasets
    endog = dta['infl'].copy()
    if updates:
        endog1 = endog.loc[:'2009Q2'].copy()
        endog2 = endog.loc[:'2009Q3'].copy()
    else:
        endog1 = endog.loc[:'2009Q3'].copy()
        endog2 = endog.loc[:'2009Q3'].copy()
    if revisions:
        # TODO: add test for only one of the variables revising?
        endog1.iloc[-1] = 0.

    # Get the previous results object and compute the news
    mod1 = sarimax.SARIMAX(endog1)
    res1 = mod1.smooth([0.5, 1.0])

    mod2 = sarimax.SARIMAX(endog2)
    res2 = mod2.smooth([0.5, 1.0])

    news_updated_data = res1.news(endog2, comparison_type='updated')
    news_previous_data = res2.news(endog1, comparison_type='previous')
    news_updated_results = res1.news(res2, comparison_type='updated')
    news_previous_results = res2.news(res1, comparison_type='previous')

    attrs = [
        'total_impacts', 'update_impacts', 'revision_impacts', 'news',
        'weights', 'update_forecasts', 'update_realized',
        'prev_impacted_forecasts', 'post_impacted_forecasts', 'revisions_iloc',
        'revisions_ix', 'updates_iloc', 'updates_ix'
    ]

    for attr in attrs:
        w = getattr(news_updated_data, attr)
        x = getattr(news_previous_data, attr)
        y = getattr(news_updated_results, attr)
        z = getattr(news_previous_results, attr)
        if isinstance(x, pd.Series):
            assert_series_equal(w, x)
            assert_series_equal(w, y)
            assert_series_equal(w, z)
        else:
            assert_frame_equal(w, x)
            assert_frame_equal(w, y)
            assert_frame_equal(w, z)
Пример #23
0
 def test_orig(self):
     assert_series_equal(self.data.orig_endog, self.endog)
     assert_series_equal(self.data.orig_exog, self.exog)
Пример #24
0
 def test_add_constant_series(self):
     s = pd.Series([1.0, 2.0, 3.0])
     output = tools.add_constant(s)
     expected = pd.Series([1.0, 1.0, 1.0], name="const")
     assert_series_equal(expected, output["const"])