Пример #1
0
def test_pandas_dates():

    data = [988, 819, 964]
    dates = [
        '2016-01-01 12:00:00', '2016-02-01 12:00:00', '2016-03-01 12:00:00'
    ]

    datetime_dates = pd.to_datetime(dates)

    result = pd.Series(data=data, index=datetime_dates, name='price')
    df = pd.DataFrame(data={'price': data},
                      index=pd.DatetimeIndex(dates, freq='MS'))

    model = TimeSeriesModel(df['price'])

    assert_equal(model.data.dates, result.index)
Пример #2
0
def test_predict_freq():
    # test that predicted dates have same frequency
    x = np.arange(1, 36.)

    # there's a bug in pandas up to 0.10.2 for YearBegin
    #dates = date_range("1972-4-1", "2007-4-1", freq="AS-APR")
    dates = pd.date_range("1972-4-30", "2006-4-30", freq="A-APR")
    series = pd.Series(x, index=dates)
    model = TimeSeriesModel(series)
    #npt.assert_(model.data.freq == "AS-APR")
    assert_equal(model._index.freqstr, "A-APR")

    start, end, out_of_sample, _ = (model._get_prediction_index(
        "2006-4-30", "2016-4-30"))

    predict_dates = model.data.predict_dates

    #expected_dates = date_range("2006-12-31", "2016-12-31",
    #                            freq="AS-APR")
    expected_dates = pd.date_range("2006-4-30", "2016-4-30", freq="A-APR")
    assert_equal(predict_dates, expected_dates)
Пример #3
0
def test_pandas_nodates_index():

    data = [988, 819, 964]
    dates = ['a', 'b', 'c']
    s = pd.Series(data, index=dates)

    # TODO: Remove this, this is now valid
    # npt.assert_raises(ValueError, TimeSeriesModel, s)

    # Test with a non-date index that doesn't raise an exception because it
    # can be coerced into a nanosecond DatetimeIndex
    data = [988, 819, 964]
    # index=pd.date_range('1970-01-01', periods=3, freq='QS')
    index = pd.to_datetime([100, 101, 102])
    s = pd.Series(data, index=index)

    actual_str = (index[0].strftime('%Y-%m-%d %H:%M:%S.%f') +
                  str(index[0].value))
    assert_equal(actual_str, '1970-01-01 00:00:00.000000100')
    mod = TimeSeriesModel(s)
    start, end, out_of_sample, _ = mod._get_prediction_index(0, 4)
    assert_equal(len(mod.data.predict_dates), 5)
Пример #4
0
def test_predict_freq():
    # test that predicted dates have same frequency
    x = np.arange(1, 36.)

    # there's a bug in pandas up to 0.10.2 for YearBegin
    #dates = date_range("1972-4-1", "2007-4-1", freq="AS-APR")
    dates = date_range("1972-4-30", "2006-4-30", freq="A-APR")
    series = Series(x, index=dates)
    model = TimeSeriesModel(series)
    #npt.assert_(model.data.freq == "AS-APR")
    npt.assert_(model.data.freq == "A-APR")

    start = model._get_predict_start("2006-4-30")
    end = model._get_predict_end("2016-4-30")
    model._make_predict_dates()

    predict_dates = model.data.predict_dates

    #expected_dates = date_range("2006-12-31", "2016-12-31",
    #                            freq="AS-APR")
    expected_dates = date_range("2006-4-30", "2016-4-30", freq="A-APR")
    npt.assert_equal(predict_dates, expected_dates)
Пример #5
0
def test_pandas_nodates_index():

    data = [988, 819, 964]
    dates = ['a', 'b', 'c']
    s = pd.Series(data, index=dates)

    npt.assert_raises(ValueError, TimeSeriesModel, s)

    # Test with a non-date index that doesn't raise an exception because it
    # can be coerced into a nanosecond DatetimeIndex
    # (This test doesn't make sense for Numpy < 1.7 since they don't have
    # nanosecond support)
    # (This test also doesn't make sense for Pandas < 0.14 since we don't
    # support nanosecond index in Pandas < 0.14)
    try:
        # Check for Numpy < 1.7
        _freq_to_pandas['N']
    except:
        pass
    else:
        data = [988, 819, 964]
        # index=pd.date_range('1970-01-01', periods=3, freq='QS')
        index = pd.to_datetime([100, 101, 102])
        s = pd.Series(data, index=index)

        # Alternate test for Pandas < 0.14
        from distutils.version import LooseVersion
        from pandas import __version__ as pd_version
        if LooseVersion(pd_version) < '0.14':
            assert_raises(NotImplementedError, TimeSeriesModel, s)
        else:
            actual_str = (index[0].strftime('%Y-%m-%d %H:%M:%S.%f') +
                          str(index[0].value))
            assert_equal(actual_str, '1970-01-01 00:00:00.000000100')
            mod = TimeSeriesModel(s)
            start = mod._get_predict_start(0)
            end, out_of_sample = mod._get_predict_end(4)
            mod._make_predict_dates()
            assert_equal(len(mod.data.predict_dates), 5)
Пример #6
0
    def __init__(self,
                 endog=None,
                 exog=None,
                 order=None,
                 seasonal_order=None,
                 ar_order=None,
                 diff=None,
                 ma_order=None,
                 seasonal_ar_order=None,
                 seasonal_diff=None,
                 seasonal_ma_order=None,
                 seasonal_periods=None,
                 trend=None,
                 enforce_stationarity=None,
                 enforce_invertibility=None,
                 concentrate_scale=None,
                 trend_offset=1,
                 dates=None,
                 freq=None,
                 missing='none',
                 validate_specification=True):

        # Basic parameters
        self.enforce_stationarity = enforce_stationarity
        self.enforce_invertibility = enforce_invertibility
        self.concentrate_scale = concentrate_scale
        self.trend_offset = trend_offset

        # Validate that we were not given conflicting specifications
        has_order = order is not None
        has_specific_order = (ar_order is not None or diff is not None
                              or ma_order is not None)
        has_seasonal_order = seasonal_order is not None
        has_specific_seasonal_order = (seasonal_ar_order is not None
                                       or seasonal_diff is not None
                                       or seasonal_ma_order is not None
                                       or seasonal_periods is not None)
        if has_order and has_specific_order:
            raise ValueError('Cannot specify both `order` and either of'
                             ' `ar_order` or `ma_order`.')
        if has_seasonal_order and has_specific_seasonal_order:
            raise ValueError('Cannot specify both `seasonal_order` and any of'
                             ' `seasonal_ar_order`, `seasonal_ma_order`,'
                             ' or `seasonal_periods`.')

        # Compute `order`
        if has_specific_order:
            ar_order = 0 if ar_order is None else ar_order
            diff = 0 if diff is None else diff
            ma_order = 0 if ma_order is None else ma_order
            order = (ar_order, diff, ma_order)
        elif not has_order:
            order = (0, 0, 0)

        # Compute `seasonal_order`
        if has_specific_seasonal_order:
            seasonal_ar_order = (0 if seasonal_ar_order is None else
                                 seasonal_ar_order)
            seasonal_diff = 0 if seasonal_diff is None else seasonal_diff
            seasonal_ma_order = (0 if seasonal_ma_order is None else
                                 seasonal_ma_order)
            seasonal_periods = (0 if seasonal_periods is None else
                                seasonal_periods)
            seasonal_order = (seasonal_ar_order, seasonal_diff,
                              seasonal_ma_order, seasonal_periods)
        elif not has_seasonal_order:
            seasonal_order = (0, 0, 0, 0)

        # Validate shapes of `order`, `seasonal_order`
        if len(order) != 3:
            raise ValueError('`order` argument must be an iterable with three'
                             ' elements.')
        if len(seasonal_order) != 4:
            raise ValueError('`seasonal_order` argument must be an iterable'
                             ' with four elements.')

        # Validate differencing parameters
        if validate_specification:
            if order[1] < 0:
                raise ValueError('Cannot specify negative differencing.')
            if order[1] != int(order[1]):
                raise ValueError('Cannot specify fractional differencing.')
            if seasonal_order[1] < 0:
                raise ValueError('Cannot specify negative seasonal'
                                 ' differencing.')
            if seasonal_order[1] != int(seasonal_order[1]):
                raise ValueError('Cannot specify fractional seasonal'
                                 ' differencing.')
            if seasonal_order[3] < 0:
                raise ValueError('Cannot specify negative seasonal'
                                 ' periodicity.')

        # Standardize to integers or lists of integers
        order = (standardize_lag_order(order[0], 'AR'), int(order[1]),
                 standardize_lag_order(order[2], 'MA'))
        seasonal_order = (standardize_lag_order(seasonal_order[0],
                                                'seasonal AR'),
                          int(seasonal_order[1]),
                          standardize_lag_order(seasonal_order[2],
                                                'seasonal MA'),
                          int(seasonal_order[3]))

        # Validate seasonals
        if validate_specification:
            if seasonal_order[3] == 1:
                raise ValueError('Seasonal periodicity must be greater'
                                 ' than 1.')
            if ((seasonal_order[0] != 0 or seasonal_order[1] != 0
                 or seasonal_order[2] != 0) and seasonal_order[3] == 0):
                raise ValueError('Must include nonzero seasonal periodicity if'
                                 ' including seasonal AR, MA, or'
                                 ' differencing.')

        # Basic order
        self.order = order
        self.ar_order, self.diff, self.ma_order = order

        self.seasonal_order = seasonal_order
        (self.seasonal_ar_order, self.seasonal_diff, self.seasonal_ma_order,
         self.seasonal_periods) = seasonal_order

        # Lists of included lags
        if isinstance(self.ar_order, list):
            self.ar_lags = self.ar_order
        else:
            self.ar_lags = np.arange(1, self.ar_order + 1).tolist()
        if isinstance(self.ma_order, list):
            self.ma_lags = self.ma_order
        else:
            self.ma_lags = np.arange(1, self.ma_order + 1).tolist()

        if isinstance(self.seasonal_ar_order, list):
            self.seasonal_ar_lags = self.seasonal_ar_order
        else:
            self.seasonal_ar_lags = (np.arange(1, self.seasonal_ar_order +
                                               1).tolist())
        if isinstance(self.seasonal_ma_order, list):
            self.seasonal_ma_lags = self.seasonal_ma_order
        else:
            self.seasonal_ma_lags = (np.arange(1, self.seasonal_ma_order +
                                               1).tolist())

        # Maximum lag orders
        self.max_ar_order = self.ar_lags[-1] if self.ar_lags else 0
        self.max_ma_order = self.ma_lags[-1] if self.ma_lags else 0

        self.max_seasonal_ar_order = (self.seasonal_ar_lags[-1]
                                      if self.seasonal_ar_lags else 0)
        self.max_seasonal_ma_order = (self.seasonal_ma_lags[-1]
                                      if self.seasonal_ma_lags else 0)

        self.max_reduced_ar_order = (
            self.max_ar_order +
            self.max_seasonal_ar_order * self.seasonal_periods)
        self.max_reduced_ma_order = (
            self.max_ma_order +
            self.max_seasonal_ma_order * self.seasonal_periods)

        # Check that we don't have duplicate AR or MA lags from the seasonal
        # component
        ar_lags = set(self.ar_lags)
        seasonal_ar_lags = set(
            np.array(self.seasonal_ar_lags) * self.seasonal_periods)
        duplicate_ar_lags = ar_lags.intersection(seasonal_ar_lags)
        if validate_specification and len(duplicate_ar_lags) > 0:
            raise ValueError('Invalid model: autoregressive lag(s) %s are'
                             ' in both the seasonal and non-seasonal'
                             ' autoregressive components.' % duplicate_ar_lags)

        ma_lags = set(self.ma_lags)
        seasonal_ma_lags = set(
            np.array(self.seasonal_ma_lags) * self.seasonal_periods)
        duplicate_ma_lags = ma_lags.intersection(seasonal_ma_lags)
        if validate_specification and len(duplicate_ma_lags) > 0:
            raise ValueError('Invalid model: moving average lag(s) %s are'
                             ' in both the seasonal and non-seasonal'
                             ' moving average components.' % duplicate_ma_lags)

        # Handle trend
        self.trend = trend
        self.trend_poly, _ = prepare_trend_spec(trend)

        # Check for a constant column in the provided exog
        exog_is_pandas = _is_using_pandas(exog, None)
        if (validate_specification and exog is not None
                and len(self.trend_poly) > 0 and self.trend_poly[0] == 1):
            # Figure out if we have any constant columns
            x = np.asanyarray(exog)
            ptp0 = np.ptp(x, axis=0)
            col_is_const = ptp0 == 0
            nz_const = col_is_const & (x[0] != 0)
            col_const = nz_const

            # If we already have a constant column, raise an error
            if np.any(col_const):
                raise ValueError('A constant trend was included in the model'
                                 ' specification, but the `exog` data already'
                                 ' contains a column of constants.')

        # This contains the included exponents of the trend polynomial,
        # where e.g. the constant term has exponent 0, a linear trend has
        # exponent 1, etc.
        self.trend_terms = np.where(self.trend_poly == 1)[0]
        # Trend order is either the degree of the trend polynomial, if all
        # exponents are included, or a list of included exponents. Here we need
        # to make a distinction between a degree zero polynomial (i.e. a
        # constant) and the zero polynomial (i.e. not even a constant). The
        # former has `trend_order = 0`, while the latter has
        # `trend_order = None`.
        self.k_trend = len(self.trend_terms)
        if len(self.trend_terms) == 0:
            self.trend_order = None
            self.trend_degree = None
        elif np.all(self.trend_terms == np.arange(len(self.trend_terms))):
            self.trend_order = self.trend_terms[-1]
            self.trend_degree = self.trend_terms[-1]
        else:
            self.trend_order = self.trend_terms
            self.trend_degree = self.trend_terms[-1]

        # Handle endog / exog
        # Standardize exog
        self.k_exog, exog = prepare_exog(exog)

        # Standardize endog (including creating a faux endog if necessary)
        faux_endog = endog is None
        if endog is None:
            endog = [] if exog is None else np.zeros(len(exog)) * np.nan

        # Add trend data into exog
        nobs = len(endog) if exog is None else len(exog)
        if self.trend_order is not None:
            # Add in the data
            trend_data = self.construct_trend_data(nobs, trend_offset)
            if exog is None:
                exog = trend_data
            elif exog_is_pandas:
                trend_data = pd.DataFrame(trend_data,
                                          index=exog.index,
                                          columns=self.construct_trend_names())
                exog = pd.concat([trend_data, exog], axis=1)
            else:
                exog = np.c_[trend_data, exog]

        # Create an underlying time series model, to handle endog / exog,
        # especially validating shapes, retrieving names, and potentially
        # providing us with a time series index
        self._model = TimeSeriesModel(endog,
                                      exog=exog,
                                      dates=dates,
                                      freq=freq,
                                      missing=missing)
        self.endog = None if faux_endog else self._model.endog
        self.exog = self._model.exog

        # Validate endog shape
        if (validate_specification and not faux_endog and self.endog.ndim > 1
                and self.endog.shape[1] > 1):
            raise ValueError('SARIMAX models require univariate `endog`. Got'
                             ' shape %s.' % str(self.endog.shape))

        self._has_missing = (None if faux_endog else np.any(
            np.isnan(self.endog)))