Exemplo n.º 1
0
    def calculate_ndiffs(self, alpha=0.05, test="kpss", max_d=2):
        """
        Utility method for determining the optimal ``d`` value for ARIMA ordering. Calculating this
        as a fixed value can dramatically increase the tuning time for ``pmdarima`` models.

        :param alpha: significance level for determining if a pvalue used for testing a
                      value of ``'d'`` is significant or not.

                      Default: ``0.05``
        :param test: Type of unit test for stationarity determination to use.
                     Supported values: ``['kpss', 'adf', 'pp']``
                     See:

                     https://alkaline-ml.com/pmdarima/modules/generated/pmdarima.arima.KPSSTest.\
                     html#pmdarima.arima.KPSSTest

                     https://alkaline-ml.com/pmdarima/modules/generated/pmdarima.arima.PPTest.\
                     html#pmdarima.arima.PPTest

                     https://alkaline-ml.com/pmdarima/modules/generated/pmdarima.arima.ADFTest.\
                     html#pmdarima.arima.ADFTest

                     Default: ``'kpss'``
        :param max_d: The max value for ``d`` to test.
        :return: Dictionary of ``{<group_key>: <optimal 'd' value>}``
        """
        self._create_group_df()

        group_ndiffs = {
            group: ndiffs(x=group_df[self._y_col], alpha=alpha, test=test, max_d=max_d)
            for group, group_df in self._group_df
        }

        return group_ndiffs
Exemplo n.º 2
0
def check_stationarity(dataframe):
    """
    Performs the Augmented Dickey-Fuller test on all the series constituting the dataframe given.

    :param dataframe: dataframe to analyse with the ADF test.
    """
    # Make sure that the original time series is not modified
    data = dataframe.copy()
    print('\nResults of Dickey-Fuller Test:')
    print('{:<15}{:<15}{:<10}{:<10}'.format('Column', 'Stationary', 'P-value',
                                            'Order'))
    # Significance level to reject the null hypothesis is set to 0.05
    adf_test = ADFTest(alpha=0.05)
    # Cycle across each column of the dataframe
    columns = dataframe.columns
    for column in columns:
        # If the series is already stationary the differencing order is equal to 0
        order = 0
        # Compute the ADF test. It returns the p-value and if the differencing is needed
        results, should = adf_test.should_diff(data[column])
        # If the series must be differenced
        if should:
            # The differencing order needed to transform the series in stationary is computed by ndiffs()
            order = ndiffs(data[column], alpha=0.05)
        print(f'{column:<15}{not should:<15}{results:<10.5f}{order:<10}')
Exemplo n.º 3
0
 def find_order(self, data):
     # determine number of differences
     kpss_diffs = ndiffs(data, alpha=0.05, test='kpss', max_d=6)
     adf_diffs = ndiffs(data, alpha=0.05, test='adf', max_d=6)
     n_diffs = max(kpss_diffs, adf_diffs)
     # grid search to find order
     self.order = pm.auto_arima(data,
                                d=n_diffs,
                                seasonal=False,
                                stepwise=True,
                                suppress_warnings=True,
                                error_action="ignore",
                                max_order=None,
                                trace=True,
                                maxiter=20).order
     self.model = pm.arima.ARIMA(order=self.order)
     return self
Exemplo n.º 4
0
    def fit_eval(self, data, validation_data, **config):
        """
        Fit on the training data from scratch.
        :param data: A 1-D numpy array as the training data
        :param validation_data: A 1-D numpy array as the evaluation data
        :return: the evaluation metric value
        """

        if not self.model_init:
            # Estimating differencing term (d) and seasonal differencing term (D)
            kpss_diffs = ndiffs(data, alpha=0.05, test='kpss', max_d=6)
            adf_diffs = ndiffs(data, alpha=0.05, test='adf', max_d=6)
            d = max(adf_diffs, kpss_diffs)
            D = 0 if not self.seasonal else nsdiffs(data, m=7, max_D=12)
            config.update(d=d, D=D)

            self._build(**config)
            self.model_init = True

        self.model.fit(data)
        val_metric = self.evaluate(x=None, target=validation_data, metrics=[self.metric])[0].item()
        return {self.metric: val_metric}
Exemplo n.º 5
0
        (1, 1),
        (2, 0),
        (2, 1)
    ]

    for lag, ax_coords in enumerate(ax_idcs, 1):
        ax_row, ax_col = ax_coords
        axis = axes[ax_row][ax_col]
        lag_plot(df['open'], lag=lag, ax=axis)
        axis.set_title(f"Lag={lag}")
        
    plt.show()



    kpss_diffs = ndiffs(y_train, alpha=0.05, test='kpss', max_d=6)
    adf_diffs = ndiffs(y_train, alpha=0.05, test='adf', max_d=6)
    n_diffs = max(adf_diffs, kpss_diffs)

    print(f"Estimated differencing term: {n_diffs}")

    auto = pm.auto_arima(y_train, d=n_diffs, seasonal=False, stepwise=True,
                     suppress_warnings=True, error_action="ignore", max_p=6,
                     max_order=None, trace=True)

    print(auto.order)

    from sklearn.metrics import mean_squared_error
    from pmdarima.metrics import smape

    model = auto
                   Path("data").glob("forecast_*.json")),
               ignore_index=True)
df["time"] = df.apply(lambda r: datetime.fromtimestamp(r["time"]), axis=1)
df = df.sort_values(by=["time"])

temperature = df["temperature"]
temperature = temperature.fillna(temperature.mean())

train, test = train_test_split(temperature,
                               train_size=temperature.shape[0] - 365)

print(f"training size: {train.shape[0]}")
print(f"testing size: {test.shape[0]}")

# %%
kpss_diffs = ndiffs(train, alpha=0.05, test="kpss", max_d=6)
adf_diffs = ndiffs(train, alpha=0.05, test="adf", max_d=6)
n_diffs = max(adf_diffs, kpss_diffs)

print(f"d: {n_diffs}")

# %%
model = auto_arima(
    train,
    d=n_diffs,
    seasonal=True,
    m=4,
    stepwise=True,
    suppress_warnings=True,
    max_p=6,
    trace=2,
from matplotlib import pyplot as plt

print("pmdarima version: %s" % pm.__version__)

# Load the data and split it into separate pieces
y, X = load_date_example()
y_train, y_test, X_train, X_test = \
    model_selection.train_test_split(y, X, test_size=20)

# We can examine traits about the time series:
pm.tsdisplay(y_train, lag_max=10)

# We can see the ACF increases and decreases rather rapidly, which means we may
# need some differencing. There also does not appear to be an obvious seasonal
# trend.
n_diffs = arima.ndiffs(y_train, max_d=5)

# Here's what the featurizer will create for us:
date_feat = preprocessing.DateFeaturizer(
    column_name="date",  # the name of the date feature in the exog matrix
    with_day_of_week=True,
    with_day_of_month=True)

_, X_train_feats = date_feat.fit_transform(y_train, X_train)
print("Head of generated exog features:\n%s" % repr(X_train_feats.head()))

# We can plug this exog featurizer into a pipeline:
pipe = pipeline.Pipeline([
    ('date', date_feat),
    ('arima', arima.AutoARIMA(d=n_diffs,
                              trace=3,
# test['traffic'].plot(legend = True)

date_today = date.today().strftime('%Y-%m-%d')
date_tomorrow = max(traffic.index) + timedelta(days=1)
date_next_7d = pd.date_range(
    start=date_tomorrow, periods=7,
    freq='D').strftime(date_format='%Y-%m-%d').tolist()
future_forecast = pd.DataFrame({
    'date': date_next_7d,
    'traffic': future_forecast
}).assign(date_of_forecast=date_today)
future_forecast.loc[future_forecast['traffic'] < 0, 'traffic'] = 0
future_forecast['traffic'] = future_forecast['traffic'].astype(int)

engine = create_engine(
    "mysql://*****:*****@etcinsights.nazwa.pl/etcinsights_harebakken"
)
future_forecast.to_sql('traffic_date_forecast_python',
                       con=engine,
                       if_exists='append',
                       index=True)
print('Data uploaded to db successfully!')

from pmdarima.arima import ndiffs

kpss_diffs = ndiffs(traffic, alpha=0.05, test='kpss', max_d=6)
adf_diffs = ndiffs(traffic, alpha=0.05, test='adf', max_d=6)
n_diffs = max(adf_diffs, kpss_diffs)

print(f"Estimated differencing term: {n_diffs}")
# Estimated differencing term: 1