def generate_diff(self, lag=1, differences=1):
        """
        A utility for generating the array diff (lag differences) for each group.
        To support invertability, this method will return the starting value of each array as well
        as the differenced values.

        :param lag: Determines the magnitude of the lag to calculate the differencing function for.

                    Default: ``1``
        :param differences: The order of the differencing to be performed. Note that values > 1
                            will generate n fewer results.

                            Default: ``1``
        :return: Dictionary of ``{<group_key>: {"series_start": <float>, "diff": <diff_array>}}``
        """
        self._create_group_df()
        group_diff_data = {}
        for group, df in self._group_df:
            df.reset_index(inplace=True)
            group_data = {
                "diff": diff(x=df[self._y_col], lag=lag, differences=differences),
                "series_start": df[self._y_col][0],
            }
            group_diff_data[group] = group_data
        return group_diff_data
示例#2
0
order = (1, 0, 12)  # p=1, d=0, q=12
order = (1, 1, 3)  # p=1, d=1, q=3
#The parameters p and q can be iteratively searched-for with the auto_arima function, but the differencing term, d, requires a special set of tests of stationarity to estimate.

#%%% Understanding differencing (d)
An integrative term, d, is typically only used in the case of non-stationary data. Stationarity in a time series indicates that a series’ statistical attributes, such as mean, variance, etc., are constant over time (i.e., it exhibits low heteroskedasticity.

A stationary time series is far more easy to learn and forecast from. With the d parameter, you can force the ARIMA model to adjust for non-stationarity on its own, without having to worry about doing so manually.

The value of d determines the number of periods to lag the response prior to computing differences. E.g.,

from pmdarima.utils import c, diff

# lag 1, diff 1
x = c(10, 4, 2, 9, 34)
diff(x, lag=1, differences=1)
# Returns: array([ -6.,  -2.,   7.,  25.], dtype=float32)
#lag and differences are not the same!

diff(x, lag=1, differences=2)
# Returns: array([ 4.,  9., 18.], dtype=float32)

diff(x, lag=2, differences=1)
# Returns: array([-8.,  5., 32.], dtype=float32
#The lag corresponds to the offset in the time period lag, whereas the differences parameter is the number of times the differences are computed. Therefore, e.g., for differences=2, the procedure is essentially computing the difference twice:
x = c(10, 4, 2, 9, 34)
x# 1
x[1:], x[:-1]
x_lag = x[1:]  # first lag
x_lag
x[:-1]
示例#3
0
dates = pd.date_range('2020-09-01',periods=5, freq='D')
dates
sales = pd.Series([50,60,55,70,80], index=dates)
sales

#%%%
ma3 = sales.rolling(window=3).mean()
ma3
ma3c = sales.rolling(window=3, center=True).mean()
ma3c
#%%%
sales.shift(1)
sales - sales.shift(1)

from pmdarima.utils import c, diff
diff(sales, lag=1, differences=1)
sales - sales.shift(1)
#%%%%%
diff(sales, lag=2, differences=1)
sales - sales.shift(2)
#lag is the gap : 1 with 3, 2 with 4 and so on
#%%%%%
sales2 = sales.copy()
diff(sales, lag=1, differences=1)
sales2 - sales2.shift(1)

diff(sales, lag=1, differences=2)
sales2 = sales2 - sales2.shift(1)
sales2 - sales2.shift(1)

diff(sales, lag=1, differences=2)
示例#4
0
sales

#%%% Simple Moving Average (SMA)
#offset mean
ma3 = sales.rolling(window=3).mean()
ma3
ma3c = sales.rolling(window=3, center=True).mean()
ma3c
#%%% : shift 1 down and then find the daily diff
sales.shift(1)
sales - sales.shift(1)  #daily changes

#pip install pmdarima --user  #see the syntax #restart session
#https://pypi.org/project/pmdarima/
from pmdarima.utils import c, diff
diff(sales, lag=1, differences=1)  #diff from function
sales - sales.shift(1)  #same
#%%%%%
diff(sales, lag=2, differences=1)
np.vstack((sales, sales.shift(2), sales - sales.shift(2)))
sales - sales.shift(2)
#lag is the gap : 1 with 3, 2 with 4 and so on
#%%%%%
sales2 = sales.copy()
diff(sales, lag=1, differences=1)
sales2 - sales2.shift(1)

diff(sales, lag=1, differences=2)
sales2 = sales2 - sales2.shift(1)
sales2 - sales2.shift(1)