示例#1
0
def forecast_test(df, account):
    # Calculate split point and split data
    split_index = '2019-12-01'
    df_train, df_test = sales_df[sales_df.index < split_index], sales_df[
        sales_df.index >= split_index]
    X_train = df_train.index.map(dt.datetime.toordinal).values.reshape(-1, 1)
    y_train = df_train.NetExchange.values.reshape(-1, 1)
    X_test = df_test.index.map(dt.datetime.toordinal).values.reshape(-1, 1)
    y_test = df_test.NetExchange.values.reshape(-1, 1)
    endog = df_train.NetExchange
    exog = df_test.NetExchange

    # Create and fit model, and forecast 12 months
    mod = ThetaModel(endog, deseasonalize=(len(endog) >= 24))
    res = mod.fit(disp=0)
    fcast = res.forecast(12)

    # Create plot for test forecast
    fig, ax = plt.subplots()
    ax.set_title(account)
    ax.set_xlabel('Date')
    ax.set_ylabel('Net Exchange')

    # plot forecast, and actual sales history
    fcast.plot(ax=ax)
    endog.loc['2010':].plot(ax=ax)
    exog.plot(ax=ax)
    plt.legend()

    # Save figure for reference
    plt.savefig(f'./figures_us/{account}.png')
    plt.close(fig)
示例#2
0
    def fit(self, y, **kwargs):
        """
        Fit the trend component in the boosting loop for an optimized theta model.

        Parameters
        ----------
        time_series : TYPE
            DESCRIPTION.
        **kwargs : TYPE
            DESCRIPTION.

        Returns
        -------
        None.

        """
        self.kwargs = kwargs
        bias = kwargs['bias']
        y -= bias
        theta_model = ThetaModel(y, method="additive", period=1) + bias
        fitted = theta_model.fit()
        self.fitted = theta_model
        last_fitted_values = self.fitted[-1]
        self.model_params = last_fitted_values
        return self.fitted
示例#3
0
    def _theta_forecast(self, series):
        period = self._analysis['theta_period']
        steps = len(series)

        model = ThetaModel(
            series, period=period, deseasonalize=True, use_test=False
        ).fit()
        forecast = model.forecast(steps=steps, theta=20)

        return forecast
示例#4
0
def test_pi_width():
    # GH 7075
    rs = np.random.RandomState(1233091)
    y = np.arange(100) + rs.standard_normal(100)

    th = ThetaModel(y, period=12, deseasonalize=False)
    res = th.fit()
    pi = res.prediction_intervals(24)
    d = np.squeeze(np.diff(np.asarray(pi), axis=1))
    assert np.all(np.diff(d) > 0)
示例#5
0
def test_alt_index(indexed_data):
    idx = indexed_data.index
    date_like = not hasattr(idx, "freq") or getattr(idx, "freq", None) is None
    period = 12 if date_like else None
    res = ThetaModel(indexed_data, period=period).fit()
    if hasattr(idx, "freq") and idx.freq is None:
        with pytest.warns(UserWarning):
            res.forecast_components(37)
        with pytest.warns(UserWarning):
            res.forecast(23)
    else:
        res.forecast_components(37)
        res.forecast(23)
示例#6
0
def test_forecast_seasonal_alignment(data, period):
    res = ThetaModel(
        data,
        period=period,
        deseasonalize=True,
        use_test=False,
        difference=False,
    ).fit(use_mle=False)
    seasonal = res._seasonal
    comp = res.forecast_components(32)
    index = np.arange(data.shape[0], data.shape[0] + comp.shape[0])
    expected = seasonal[index % period]
    np.testing.assert_allclose(comp.seasonal, expected)
示例#7
0
def test_forecast_errors(data):
    res = ThetaModel(data, period=12).fit()
    with pytest.raises(ValueError, match="steps must be a positive integer"):
        res.forecast(-1)
    with pytest.raises(ValueError, match="theta must be a float"):
        res.forecast(7, theta=0.99)
    with pytest.raises(ValueError, match="steps must be a positive integer"):
        res.forecast_components(0)
示例#8
0
    def _theta_forecast(self, series):
        period = self._analysis['theta_period']
        steps = len(series)

        # replace last value of series by a mean value
        # to avoid some extreme cases where the foecast starts at a single
        # which may happen for very noisy data
        # series[0]   = series[::period].mean()
        # series[-1]  = series[::-period].mean()

        model = ThetaModel(series,
                           period=period,
                           deseasonalize=True,
                           use_test=False).fit()
        forecast = model.forecast(steps=steps, theta=20)

        return forecast
示例#9
0
def test_no_freq():
    idx = pd.date_range("2000-1-1", periods=300)
    locs = []
    for i in range(100):
        locs.append(2 * i + int((i % 2) == 1))
    y = pd.Series(np.random.standard_normal(100), index=idx[locs])
    with pytest.raises(ValueError, match="You must specify a period or"):
        ThetaModel(y)
示例#10
0
def test_smoke(data, period, use_mle, deseasonalize, use_test, diff, model):
    if period is None and isinstance(data, np.ndarray):
        return
    res = ThetaModel(
        data,
        period=period,
        deseasonalize=deseasonalize,
        use_test=use_test,
        difference=diff,
        method=model,
    ).fit(use_mle=use_mle)
    assert "b0" in str(res.summary())
    res.forecast(36)
    res.forecast_components(47)
    assert res.model.use_test is (use_test and res.model.deseasonalize)
    assert res.model.difference is diff
示例#11
0
    clamped_sales_df = raw_sales_df.append(end_clamp)
    clamped_sales_df.index = pd.to_datetime(clamped_sales_df.index)
    sales_df = clamped_sales_df.resample('M').sum().filter(['NetExchange'])

    if sales_df.NetExchange.sum() <= 0:
        print('No Data; forecast aborted')
        continue

    # Run example forecast
    # forecast_test(sales_df, account)

    # Prepare data for forecast
    endog = sales_df.NetExchange

    # Create and fit model, and forecast for 12 months
    mod = ThetaModel(endog, deseasonalize=(len(endog) >= 24))
    res = mod.fit(disp=0)
    fcast = res.forecast(15)

    # Plot forecast data
    try:
        res.plot_predict(
            15,
            alpha=0.2,
            in_sample=True,
        )
        plt.hlines(y=0,
                   xmin=dt.datetime.strptime('2010-01-01', '%Y-%M-%d'),
                   xmax=dt.datetime.strptime('2022-04-01', '%Y-%M-%d'))
        # endog['2016-01-01':].plot()
        plt.xlim((dt.datetime.strptime('2016-01-01', '%Y-%M-%d'),
示例#12
0
# clearly seasonal but does not have a clear trend during the same.

reader = pdr.fred.FredReader(["HOUST"], start="1980-01-01", end="2020-04-01")
data = reader.read()
housing = data.HOUST
housing.index.freq = housing.index.inferred_freq
ax = housing.plot()

# We fit specify the model without any options and fit it. The summary
# shows that the data was deseasonalized using the multiplicative method.
# The drift is modest and negative, and the smoothing parameter is fairly
# low.

from statsmodels.tsa.forecasting.theta import ThetaModel

tm = ThetaModel(housing)
res = tm.fit()
print(res.summary())

# The model is first and foremost a forecasting method.  Forecasts are
# produced using the `forecast` method from fitted model. Below we produce a
# hedgehog plot by forecasting 2-years ahead every 2 years.
#
# **Note**: the default $\theta$ is 2.

forecasts = {"housing": housing}
for year in range(1995, 2020, 2):
    sub = housing[:str(year)]
    res = ThetaModel(sub).fit()
    fcast = res.forecast(24)
    forecasts[str(year)] = fcast
示例#13
0
def get_stats():
    if not request.json:
        abort(400)

    print(request.json)

    router_name = request.json['loc']

    router_id = Router.query.filter_by(name=router_name).first().id
    data = Count.query.filter_by(router_id=router_id).order_by(
        Count.timestamp).all()

    # Get the last recorded amount of people
    last_count = data[-1].devices
    current_time = data[-1].timestamp

    # Get historical high/medium/low rating
    threshold = current_time.replace(day=1) - timedelta(days=1)
    past_counts = np.array(
        [x.devices for x in data if x.timestamp >= threshold])
    std = np.std(past_counts)
    ind = np.argmin([
        np.abs(last_count - x) for x in
        [np.max(past_counts),
         np.median(past_counts),
         np.min(past_counts)]
    ])

    if ind == 0:
        state = 'high'
    elif ind == 1:
        state = 'medium'
    else:  # ind == 1
        state = 'low'

    threshold = current_time - timedelta(hours=1)
    past_counts = np.array(
        [x.devices for x in data if x.timestamp >= threshold])

    # Predict upcoming trend
    train = list(np.copy(past_counts))

    try:
        predictions = []
        for i in range(1):
            model = ThetaModel(np.array(train), period=10)
            model_fit = model.fit(disp=0)
            output = model_fit.forecast()
            yhat = list(output)[0]
            predictions.append(yhat)
            train.append(yhat)

        trend_val = int(predictions[-1] - last_count)
    except:
        trend_val = int(past_counts[-1] - np.mean(past_counts))

    if np.sign(trend_val) > 0:
        if trend_val > std:
            trend = 'increasing'
        else:
            trend = 'slightly increasing'
    elif np.sign(trend_val) < 0:
        if np.abs(trend_val) > std:
            trend = 'decreasing'
        else:
            trend = 'slightly decreasing'
    else:
        trend = 'no change'

    res = {'num': last_count, 'state': state, 'trend': trend}

    return res