Exemplo n.º 1
0
    def plot_arima(self, df, label):
        indx = df.index
        start = indx[-20]
        end = indx[-1]

        arima_model = ARIMA(df[label][:-20], order=(2, 3, 2)).fit()
        pred = arima_model.predict(start, end, typ='levels')

        rolling_mean = df[label].rolling(window=12).mean().dropna()
        rolling_std = df[label].rolling(window=12).std().dropna()

        arima_model = ARIMA(rolling_mean[:-20],
                            order=(2, 3, 3),
                            dates=df.index).fit()
        pred_mean = arima_model.predict(start, end, typ='levels')

        arima_model = ARIMA(rolling_std[:-20], order=(2, 3, 3),
                            dates=df.index).fit()
        pred_std = arima_model.predict(start, end, typ='levels')

        plt.figure(figsize=(17, 8))
        plt.plot(df[label], alpha=0.5)
        plt.plot(rolling_mean, color='g', alpha=0.5)
        plt.plot(rolling_std, color='blue', alpha=0.5)

        # Predicted
        plt.plot(pred)
        plt.plot(pred_mean, color='g')
        plt.plot(pred_std, color='b')
        plt.title('Rolling mean')
        plt.legend([label, 'mean', 'std', 'predicted'])
Exemplo n.º 2
0
def get_arima_model(
    values: Union[pd.Series, pd.DataFrame],
    arima_order: str,
    n_predict: int,
    seasonal: bool,
    ic: str,
) -> Tuple[List[float], Any]:
    """Get an ARIMA model for data

    Parameters
    ----------
    values : Union[pd.Series, pd.DataFrame]
        Data to fit
    arima_order : str
        String of ARIMA params in form "p,q,d"
    n_predict : int
        Days to predict
    seasonal : bool
        Flag to use seasonal model
    ic : str
        Information Criteria for model evaluation

    Returns
    -------
    List[float]
        List of predicted values
    Any
        Fit ARIMA model object.
    """
    if arima_order:
        model = ARIMA(
            values, order=tuple(int(ord) for ord in arima_order.split(","))
        ).fit()
        l_predictions = list(
            model.predict(
                start=len(values.values) + 1,
                end=len(values.values) + n_predict,
            )
        )
    else:
        if seasonal:
            model = pmdarima.auto_arima(
                values.values,
                error_action="ignore",
                seasonal=True,
                m=5,
                information_criteria=ic,
            )
        else:
            model = pmdarima.auto_arima(
                values.values,
                error_action="ignore",
                seasonal=False,
                information_criteria=ic,
            )
        l_predictions = list(model.predict(n_predict))

    return l_predictions, model
Exemplo n.º 3
0
def q3_b():
    print("begin")
    df = get_data("data/HW5_WMT.xlsx", "HW5_WMT")
    df.index = pd.to_datetime(df.index, format='%Y%m%d')
    df['first_difference'] = np.log(df['WMT']) - np.log(df['WMT']).shift(1)
    df['season_difference'] = np.log(df['WMT']) - np.log(df['WMT']).shift(4)
    df = df.head(df.index.get_loc('2016-03-31'))
    print(df)
    ARIMA_model = ARIMA(np.log(df['WMT']),
                        order=(0, 1, 1)).fit()  # p=0, d=1, q=1
    print(ARIMA_model.summary())
    ARIMA_model.predict().plot()
    np.log(df['WMT']).plot()
    plt.show()
Exemplo n.º 4
0
 def arimamodel(self,
                train,
                test,
                ar_param,
                order,
                ma_param,
                iterative=True):
     history = [x for x in train]
     preds = list()
     stamps = [int(len(test) / 10) * i for i in range(1, 10)]
     if iterative:
         for i in range(len(test)):
             arima_model = ARIMA(history, order=(ar_param, order, ma_param))
             arima_model = arima_model.fit()
             output = arima_model.forecast()
             yhat = output[0]
             preds.append(yhat)
             obs = test[i]
             history.append(obs)
             if i in stamps:
                 print(
                     str(datetime.now()) +
                     ': Arima Prediction {}0% Complete: {} out of {}'.
                     format((stamps.index(i) + 1), i, len(test)))
     else:
         arima_model = ARIMA(history, order=(ar_param, order, ma_param))
         arima_model = arima_model.fit()
         preds = arima_model.predict(start=len(history),
                                     end=len(history) + len(test) - 1)
     return preds
Exemplo n.º 5
0
def predict(coinDataset, daysSelected):
    # running auto_arima on the selected coin closing price to get its best p, d, q values
    # predictions not looking so good long term, perhaps short term predictions would suit
    stepwise_fit = auto_arima(coinDataset["close"],
                              trace=True,
                              suppress_warnings=True,
                              test="adf")
    order = stepwise_fit.get_params().get("order")

    # training the model now based on entire dataset to make future predictions
    model = ARIMA(coinDataset["close"], order=order)
    model = model.fit()
    # print(coinDataset.tail())  # checking what the last date is, then predict from this day onward

    # predicting from the current date onwards
    startDate = dt.datetime.now()

    # for testing, let's predict the x days into the future (depending on how many days user selected)
    indexFutureDates = pd.date_range(
        start=startDate.strftime("%Y-%m-%d"),
        end=(startDate +
             relativedelta(days=+daysSelected)).strftime("%Y-%m-%d"))
    prediction = model.predict(start=len(coinDataset),
                               end=len(coinDataset) + daysSelected,
                               typ="levels")
    # like before, we're handling dataset for indexing so we can plot it
    prediction.index = indexFutureDates
    # converting to pandas dataframe and creating columns (this MIGHT be needed for JavaScript chart)
    # predictionToDf = pd.DataFrame({"date": prediction.index, "predictedClose": prediction.values})
    # print(predictionToDf)

    return prediction
 def _predict_n_samples(self, instance, n_samples):
     exp_model = ARIMA(
         instance,
         order=self._order_params).fit()  # Use fit model with new instance
     start_index = instance.shape[0]
     end_index = start_index + n_samples - 1
     return exp_model.predict(start=start_index, end=end_index)
Exemplo n.º 7
0
 def ARIMA_predict(self):
     '''
     Would you like the model to look for an ARIMA order?: 
     if user enters Y:    
         model will look for best p,d,q order
         user must input Auto or Manual
         Auto uses auto_arima function
         Manual uses best_order function
         CAUTION: MANUAL IS VERY COMPUTATIONALLY EXPENSIVE (~20 minutes)
     if user enters N:
         user is asked if they would like to enter their own p,d,q
         if user enters Y:
             inputs for p,d, and q will follow
         if user enters N:
             model will use ARIMA p,d,q (4,1,1) as order                
     '''
     print('Would you like the model to look for an ARIMA order? (Y/N):')
     find_order = input()
     if find_order.lower() == 'y':
         print('Auto or Manual?:')
         pdq_type = input()
         if pdq_type.lower() == 'manual':
             print(
                 'CAUTION: MANUAL IS VERY COMPUTATIONALLY EXPENSIVE (~20 minutes) \nPlease enter "confirm" to proceed'
             )
             confirm = input()
             if confirm.lower == 'confirm':
                 print('Please hold')
                 order = self.best_order()
             elif confirm.lower != 'confirm':
                 pdq_type == 'auto'
         elif pdq_type.lower() == 'auto':
             order = self.auto_pdq()
     elif find_order.lower == 'n':
         print('Would you like to use a specific order? (Y/N)')
         enter_order = input()
         if enter_order.lower() == 'y':
             ord_p = int(input('p:'))
             ord_d = int(input('d:'))
             ord_q = int(input('q:'))
             order = (ord_p, ord_d, ord_q)
         elif enter_order.lower() == 'n':
             order = (4, 1, 1)
     atrain, atest = self.train_test()
     atest_s, atest_e = atest.index.date[0], atest.index.date[-1]
     atrain_s, atrain_e = atrain.index.date[0], atrain.index.date[-1]
     res = ARIMA(atrain, order=order).fit()
     a_pred = res.predict(atest_s, atest_e)
     arima_title = f'ARIMA {order}         MSE={round(mean_squared_error(atest,a_pred),5)}'
     return res, atrain, atest, arima_title, a_pred
Exemplo n.º 8
0
    def ARIMA_predict(self, df, diff_type):
        '''
        ==Function== 
        Attain user inputs to decide ARIMA order 
        
        ==Parameters==
        |diff_type| : order of differencing applied to df
            'first' or 'second'
        
        ==Returns==
        res = .fit()
        atrain, atest = train and test set used for ARIMA
        arima_title = title to be used in a plot
        a_pred = predictions from ARIMA model
        order = order used in ARIMA
        '''
        if self.order_method.lower() == 'predetermined':
            if diff_type == 'first':
                order = (3, 0, 0)
            elif diff_type == 'second':
                order = (3, 0, 1)
        elif self.order_method.lower() == 'auto':
            order = self.auto_pdq(df)
        elif self.order_method.lower() == 'manual':
            print(
                'CAUTION: MANUAL IS VERY COMPUTATIONALLY EXPENSIVE (~20 minutes) \nPlease enter "confirm" to proceed'
            )
            confirmation = input()
            if confirmation.lower() == 'confirm':
                print('Please hold')
                order = self.best_order(df)
            else:
                print('Changing to Auto')
                order = self.auto_pdq(df)
        elif self.order_method.lower() == 'select':
            print('Please input each parameter')
            ord_p = int(input('p:'))
            ord_d = int(input('d:'))
            ord_q = int(input('q:'))
            order = (ord_p, ord_d, ord_q)

        atrain, atest = self.train_test(df)
        atest_s, atest_e = atest.index.date[0], atest.index.date[-1]
        atrain_s, atrain_e = atrain.index.date[0], atrain.index.date[-1]
        res = ARIMA(df, order=order).fit()
        a_pred = res.predict(atest_s, atest_e)
        arima_title = f'ARIMA {order}         MSE={round(mean_squared_error(atest,a_pred),5)}'
        return res, atrain, atest, arima_title, a_pred, order
Exemplo n.º 9
0
def ARIMA_predict(df, order):
    
    train, test = train_test(df)
    test_s, test_e = test.index.date[0], test.index.date[-1]
    train_s, train_e = train.index.date[0], train.index.date[-1]
    res = ARIMA(train, order=order).fit()
    fig, ax = plt.subplots(1, figsize=(14, 4))
    ax.plot(test.index, test)
    ax.plot(train.index[-20:], train[-20:])
    fig = res.plot_predict(test_s,test_e, ax=ax, plot_insample=True)
    
    # plt.title('MSE {}'.format(round(mean_squared_error(test,res.predict('2015-06-14','2019-1-6')),5)))
    plt.title('Forecasted [{} - {}] Data \n Based On [{} - {}] Data\n ARIMA {} MSE= {}'.format(
                                test_s, test_e, 
                                train_s, train_e,order,
                                round(mean_squared_error(test,res.predict(test_s, test_e)),5)))
    plt.show()
Exemplo n.º 10
0
def arima(l_args, s_ticker, df_stock):
    parser = argparse.ArgumentParser(
        prog="arima",
        description="""
            In statistics and econometrics, and in particular in time series analysis, an
            autoregressive integrated moving average (ARIMA) model is a generalization of an
            autoregressive moving average (ARMA) model. Both of these models are fitted to time
            series data either to better understand the data or to predict future points in the
            series (forecasting). ARIMA(p,d,q) where parameters p, d, and q are non-negative
            integers, p is the order (number of time lags) of the autoregressive model, d is the
            degree of differencing (the number of times the data have had past values subtracted),
            and q is the order of the moving-average model.
        """,
    )

    parser.add_argument(
        "-d",
        "--days",
        action="store",
        dest="n_days",
        type=check_positive,
        default=5,
        help="prediction days.",
    )
    parser.add_argument(
        "-i",
        "--ic",
        action="store",
        dest="s_ic",
        type=str,
        default="aic",
        choices=["aic", "aicc", "bic", "hqic", "oob"],
        help="information criteria.",
    )
    parser.add_argument(
        "-s",
        "--seasonal",
        action="store_true",
        default=False,
        dest="b_seasonal",
        help="Use weekly seasonal data.",
    )
    parser.add_argument(
        "-o",
        "--order",
        action="store",
        dest="s_order",
        type=str,
        help="arima model order (p,d,q) in format: pdq.",
    )
    parser.add_argument(
        "-r",
        "--results",
        action="store_true",
        dest="b_results",
        default=False,
        help="results about ARIMA summary flag.",
    )

    try:
        ns_parser = parse_known_args_and_warn(parser, l_args)

        # Machine Learning model
        if ns_parser.s_order:
            t_order = tuple([int(ord) for ord in list(ns_parser.s_order)])
            model = ARIMA(df_stock["5. adjusted close"].values, order=t_order).fit()
            l_predictions = model.predict(
                start=len(df_stock["5. adjusted close"]) + 1,
                end=len(df_stock["5. adjusted close"]) + ns_parser.n_days,
            )
        else:
            if ns_parser.b_seasonal:
                model = pmdarima.auto_arima(
                    df_stock["5. adjusted close"].values,
                    error_action="ignore",
                    seasonal=True,
                    m=5,
                    information_criteria=ns_parser.s_ic,
                )
            else:
                model = pmdarima.auto_arima(
                    df_stock["5. adjusted close"].values,
                    error_action="ignore",
                    seasonal=False,
                    information_criteria=ns_parser.s_ic,
                )
            l_predictions = model.predict(n_periods=ns_parser.n_days)

        # Prediction data
        l_pred_days = get_next_stock_market_days(
            last_stock_day=df_stock["5. adjusted close"].index[-1],
            n_next_days=ns_parser.n_days,
        )
        df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price")

        if ns_parser.b_results:
            print(model.summary())
            print("")

        # Plotting
        plt.figure()
        plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2)
        if ns_parser.s_order:
            plt.title(
                f"ARIMA {str(t_order)} on {s_ticker} - {ns_parser.n_days} days prediction"
            )
        else:
            plt.title(
                f"ARIMA {model.order} on {s_ticker} - {ns_parser.n_days} days prediction"
            )
        plt.xlim(
            df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1]
        )
        plt.xlabel("Time")
        plt.ylabel("Share Price ($)")
        plt.grid(b=True, which="major", color="#666666", linestyle="-")
        plt.minorticks_on()
        plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2)
        plt.plot(
            [df_stock.index[-1], df_pred.index[0]],
            [df_stock["5. adjusted close"].values[-1], df_pred.values[0]],
            lw=1,
            c="tab:green",
            linestyle="--",
        )
        plt.plot(df_pred.index, df_pred, lw=2, c="tab:green")
        plt.axvspan(
            df_stock.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2
        )
        _, _, ymin, ymax = plt.axis()
        plt.vlines(
            df_stock.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k"
        )
        plt.ion()
        plt.show()

        # Print prediction data
        print_pretty_prediction(df_pred, df_stock["5. adjusted close"].values[-1])
        print("")

    except Exception as e:
        print(e)
        print("")
Exemplo n.º 11
0
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(resid.values.squeeze(), lags=40, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2)

r, q, p = sm.tsa.acf(resid.values.squeeze(), fft=True, qstat=True)
data = np.c_[np.arange(1, 25), r[1:], q, p]

table = pd.DataFrame(data, columns=["lag", "AC", "Q", "Prob(>Q)"])
print(table.set_index("lag"))

# * This indicates a lack of fit.

# * In-sample dynamic prediction. How good does our model do?

predict_sunspots = arma_mod30.predict("1990", "2012", dynamic=True)
print(predict_sunspots)


def mean_forecast_err(y, yhat):
    return y.sub(yhat).mean()


mean_forecast_err(dta.SUNACTIVITY, predict_sunspots)

# ### Exercise: Can you obtain a better fit for the Sunspots model? (Hint:
# sm.tsa.AR has a method select_order)

# ### Simulated ARMA(4,1): Model Identification is Difficult

from statsmodels.tsa.arima_process import ArmaProcess
Exemplo n.º 12
0
)
plt.xlabel('Date')
plt.ylabel('Nombre de vélos')
plt.title(
    'Nombre de vélos par jour entre 00h00 et 09h00 au totem d\'Albert 1er')
plt.legend()
plt.show()

#Using auto_arima algorithm to find the best suitable orders for ARIMA model
stepwise_fit = auto_arima(totem['Count'], trace=True, seasonal=True)
stepwise_fit.summary

#Building the ARIMA model
model = ARIMA(totem['Count'], order=(2, 1, 1))
model = model.fit()
model.summary()

pred = model.predict(end=len(totem) + 1, type="levels").rename(
    'Prediction ARIMA'
)  #The last index printed corresponds to the day we want the prediction for

#Plotting the prediction curve
pred.plot(legend=True)
plt.xlabel('Date')
plt.ylabel('Nombre de vélos')
plt.title(
    'Nombre de vélos par jour entre 00h00 et 09h00 au totem d\'Albert 1er')
plt.legend()
plt.show()
print(pred.tail)
Exemplo n.º 13
0
class Nesterov(Approximator):
    r"""
    Реализация метода Нестерова, в случае фиксированого параметра \Delta
    и предсказаний \gamma, k и l
    """
    _name = 'Модель Нестерова'
    _parameters = {
        'model': {
            'description': 'Модель предсказания: ARIMA',
            'type': 'choise',
            'values': ['ARIMA'],
            'default': 'ARIMA',
            'min': None,
            'max': None
        },
        'delta': {
            'description': 'Параметр задержки заболевения.'
            ' В диапазоне от 1 до 30',
            'type': 'continues',
            'values': [],
            'default': '14',
            'min': '1',
            'max': '30'
        }
    }

    def __init__(self, delta=14, model='ARIMA'):
        super(Nesterov, self).__init__()

        self.delta = int(delta)
        if self.delta < int(self._parameters['delta']['min']):
            self.delta = int(self._parameters['delta']['min'])
        if self.delta > int(self._parameters['delta']['max']):
            self.delta = int(self._parameters['delta']['max'])

        self.gamma = 1 / self.delta
        self.k_param = 0.0007
        self.l_param = 0.03

        self.model = model

    def calculate_S(self, date):
        # S(d) = S(d - 1) + C(d) - D(d) - L(d)
        return (self.dict_of_data.get(date + datetime.timedelta(days=-1),
                                      {'S': 0})['S'] +
                self.dict_of_data[date]['new sick'] -
                self.dict_of_data[date]['new died'] -
                self.dict_of_data[date]['new reco'])

    def calculate_gamma(self, key):
        # gamma(d) = C(d + \delta) / (T(d + \delta - 1) - T(d - 1))
        delta = self.dict_of_data[key]['delta']
        return (self.dict_of_data[key +
                                  datetime.timedelta(days=delta)]['new sick'] /
                (self.dict_of_data[key + datetime.timedelta(days=delta - 1)]
                 ['sick'] - self.dict_of_data.get(
                     key + datetime.timedelta(days=-1), {'sick': 0})['sick']))

    def calculate_k_and_l(self, key):
        # k(d) = D(d) / S(d - 1)
        # l(d) = R(d) / S(d - 1)
        def calc(value):
            return (value / S_prev) if S_prev != 0 else 0

        S_prev = self.dict_of_data.get(key + datetime.timedelta(days=-1),
                                       {'S': 0})['S']
        self.dict_of_data[key]['k'] = calc(self.dict_of_data[key]['new died'])
        self.dict_of_data[key]['l'] = calc(self.dict_of_data[key]['new reco'])

    def fit(self, data):
        r"""
        Данная функция должна аппроксимировать выборку для полученных данных.
        Под аппроксимацией подрозумевается настройка всех параметров модели.
        Предполагается, что все дни представлены в выборки.

        :param data: Словарь вида
                key - номер объекта,
                value словарь {'date': строка в формате day.month.year,
                               'sick': int,
                               'recovered': int,
                               'died': int}
        :type data: dict
        """

        self.dict_of_data = dict()

        for key in data:
            date = datetime.datetime.strptime(data[key]['date'],
                                              '%d.%m.%Y').date()
            if date not in self.dict_of_data:
                self.dict_of_data[date] = dict()
            self.dict_of_data[date]['new sick'] = data[key]['sick']
            self.dict_of_data[date]['new died'] = data[key]['died']
            self.dict_of_data[date]['new reco'] = data[key]['recovered']

        # Надо бы обработать пропуск значений

        for key in self.dict_of_data:
            self.dict_of_data[key]['sick'] = (
                self.dict_of_data.get(key - datetime.timedelta(days=1),
                                      {'sick': 0})['sick'] +
                self.dict_of_data[key]['new sick'])

        if self.model == 'ARIMA':
            for key in self.dict_of_data:
                self.dict_of_data[key]['delta'] = self.delta
                try:
                    self.dict_of_data[key]['gamma'] = self.calculate_gamma(key)
                except Exception:
                    pass

                self.dict_of_data[key]['S'] = self.calculate_S(key)
                self.calculate_k_and_l(key)

            gammas = [
                self.dict_of_data[key]['gamma'] for key in self.dict_of_data
                if 'gamma' in self.dict_of_data[key]
            ]
            g_dates = [
                key.strftime('%Y-%m-%d') for key in self.dict_of_data
                if 'gamma' in self.dict_of_data[key]
            ]
            ds = [
                self.dict_of_data[key]['k'] for key in self.dict_of_data
                if 'k' in self.dict_of_data[key]
            ]
            ls = [
                self.dict_of_data[key]['l'] for key in self.dict_of_data
                if 'l' in self.dict_of_data[key]
            ]
            dl_dates = [
                key.strftime('%Y-%m-%d') for key in self.dict_of_data
                if 'k' in self.dict_of_data[key]
            ]

            self.gamma_model = ARIMA(pd.Series(gammas, index=g_dates),
                                     order=(6, 0, 4),
                                     trend='n').fit()
            self.d_model = ARIMA(pd.Series(ds, index=dl_dates),
                                 order=(5, 1, 4),
                                 trend='n').fit()
            self.l_model = ARIMA(pd.Series(ls, index=dl_dates),
                                 order=(6, 1, 6),
                                 trend='n').fit()

            for key in self.dict_of_data:
                self.predict_params(key)

    def predict_params(self, date):
        date_str = date.strftime('%Y-%m-%d')
        if 'gamma' not in self.dict_of_data[date]:
            self.dict_of_data[date]['gamma'] = \
                self.gamma_model.predict(start=date_str,
                                         end=date_str).values[0]
        if 'k' not in self.dict_of_data[date]:
            self.dict_of_data[date]['k'] = \
                self.d_model.predict(start=date_str,
                                     end=date_str).values[0]
        if 'l' not in self.dict_of_data[date]:
            self.dict_of_data[date]['l'] = \
                self.l_model.predict(start=date_str,
                                     end=date_str).values[0]

    def predict(self, date):
        r"""
        Данная функция должна возвращать предсказания для данной даты.
        Предсказывать нужно количество заболевших, выздоровших и умерших.

        :param date: Строка формата "day.month.year"
        :type date: str

        return: ссловарь вида:
        {
            'date': строка в формате day.month.year,
            'sick': int,
            'recovered': int,
            'died': int
        }
        :rtype: dict
        """
        date = datetime.datetime.strptime(date, '%d.%m.%Y').date()
        cur_date = max(self.dict_of_data) + datetime.timedelta(days=1)
        while cur_date <= date:
            self.dict_of_data[cur_date] = dict()
            self.predict_params(cur_date)
            self.dict_of_data[cur_date]['delta'] = self.delta

            # gamma(d) = gamma(d - \delta) * (C(d - 1) - C(d - \delta + 1))
            self.dict_of_data[cur_date]['new sick'] = int(
                self.dict_of_data.get(
                    cur_date - datetime.timedelta(
                        days=self.dict_of_data[cur_date]['delta']),
                    {'gamma': self.gamma})['gamma'] *
                (self.dict_of_data.get(cur_date - datetime.timedelta(days=1),
                                       {'sick': 0})['sick'] -
                 self.dict_of_data.get(
                     cur_date - datetime.timedelta(
                         days=self.dict_of_data[cur_date]['delta'] + 1),
                     {'sick': 0})['sick']))

            # D(d) = k(d) * S(d - 1)
            self.dict_of_data[cur_date]['new died'] = int(
                self.dict_of_data.get(cur_date, {'k', self.k_param})['k'] *
                self.dict_of_data[cur_date + datetime.timedelta(days=-1)]['S'])
            # R(d) = l(d) * S(d - 1)
            self.dict_of_data[cur_date]['new reco'] = int(
                self.dict_of_data.get(cur_date, {'l', self.l_param})['l'] *
                self.dict_of_data[cur_date + datetime.timedelta(days=-1)]['S'])
            self.dict_of_data[cur_date]['S'] = self.calculate_S(cur_date)

            self.dict_of_data[cur_date]['sick'] = self.dict_of_data.get(
                cur_date - datetime.timedelta(days=1),
                {'sick': 0})['sick'] + self.dict_of_data[cur_date]['new sick']

            cur_date = cur_date + datetime.timedelta(days=1)

        return {
            'date': date.strftime('%d.%m.%Y'),
            'sick': self.dict_of_data[date]['new sick'],
            'recovered': self.dict_of_data[date]['new reco'],
            'died': self.dict_of_data[date]['new died']
        }

    def predict_between(self, date_from, date_to):
        r"""
        Данная функция должна возвращать предсказания для всех дат между
            адаными.
        Предсказывать нужно количество заболевших, выздоровших и умерших.

        :param date: Строка формата "day.month.year"
        :type date: str

        :return: список словарей вида:
        {
            'date': строка в формате day.month.year,
            'sick': int,
            'recovered': int,
            'died': int
        }
        :rtype: list
        """
        date_from = datetime.datetime.strptime(date_from, '%d.%m.%Y')
        date_to = datetime.datetime.strptime(date_to, '%d.%m.%Y')

        cur_date = date_from

        list_of_ret = []
        while cur_date <= date_to:
            pred = self.predict(cur_date.strftime('%d.%m.%Y'))
            cur_date = cur_date + datetime.timedelta(days=1)

            list_of_ret.append(pred)

        return list_of_ret
Exemplo n.º 14
0
        ax = plt.gca()
    ax.plot(train, label='train')
    ax.plot(test, label='test')
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)
    ax.plot(pred.index, pred.values, label='predicted', color='r')
    plt.legend()

p, d, q = 2, 1, 2
n = len(df)
train = df[:n*9//10]
test = df[n*9//10+1:]

mod = ARIMA(train, order=(p, d, q))
mod = mod.fit()
pred = mod.predict(start = len(train), end = len(df)-1)

plt.figure(figsize=(FIG_WIDTH, FIG_HEIGHT))
plot_forecasting(train, test, pred, x_label=x_label, y_label=y_label)
plt.savefig(graphsDir + 'Deaths - Train vs Test vs Predicted')





fig, axs = plt.subplots(1, 1, figsize=(FIG_WIDTH, FIG_HEIGHT))
fig.suptitle(f'ARIMA predictions (p={p},d={d},q={q})')


# PREDICT 2019
print(df)
# AR(1), the second prediction will follow the model, so that when using
# `ARIMA`, the prediction is
#
# $$
# Y_2 = \hat{\delta} + \hat{\rho} \left(Y_1 - \hat{\delta}\right)
# $$
#
# since `ARIMA` treats both exogenous and trend terms as regression with
# ARMA errors.
#
# This can be seen in the next set of cells.

arima_res = ARIMA(y, order=(1, 0, 0), trend="c").fit()
print_params(arima_res.summary())

arima_res.predict(0, 2)

delta_hat, rho_hat = arima_res.params[:2]
delta_hat + rho_hat * (y[0] - delta_hat)

# `SARIMAX` treats trend terms differently, and so the one-step forecast
# from a model estimated using `SARIMAX` is
#
# $$
# Y_2 = \hat\delta + \hat\rho Y_1
# $$

sarima_res = SARIMAX(y, order=(1, 0, 0), trend="c").fit()
print_params(sarima_res.summary())

sarima_res.predict(0, 2)
Exemplo n.º 16
0
def arima(other_args: List[str], s_ticker: str, df_stock: pd.DataFrame):
    """
    ARIMA prediction
    Parameters
    ----------
    other_args: List[str]
        Argparse arguments
    s_ticker: str
        ticker
    df_stock: pd.DataFrame
        Dataframe of prices

    """
    parser = argparse.ArgumentParser(
        add_help=False,
        prog="arima",
        description="""
            In statistics and econometrics, and in particular in time series analysis, an
            autoregressive integrated moving average (ARIMA) model is a generalization of an
            autoregressive moving average (ARMA) model. Both of these models are fitted to time
            series data either to better understand the data or to predict future points in the
            series (forecasting). ARIMA(p,d,q) where parameters p, d, and q are non-negative
            integers, p is the order (number of time lags) of the autoregressive model, d is the
            degree of differencing (the number of times the data have had past values subtracted),
            and q is the order of the moving-average model.
        """,
    )

    parser.add_argument(
        "-d",
        "--days",
        action="store",
        dest="n_days",
        type=check_positive,
        default=5,
        help="prediction days.",
    )
    parser.add_argument(
        "-i",
        "--ic",
        action="store",
        dest="s_ic",
        type=str,
        default="aic",
        choices=["aic", "aicc", "bic", "hqic", "oob"],
        help="information criteria.",
    )
    parser.add_argument(
        "-s",
        "--seasonal",
        action="store_true",
        default=False,
        dest="b_seasonal",
        help="Use weekly seasonal data.",
    )
    parser.add_argument(
        "-o",
        "--order",
        action="store",
        dest="s_order",
        type=str,
        help="arima model order (p,d,q) in format: p,d,q.",
    )
    parser.add_argument(
        "-r",
        "--results",
        action="store_true",
        dest="b_results",
        default=False,
        help="results about ARIMA summary flag.",
    )
    parser.add_argument(
        "-e",
        "--end",
        action="store",
        type=valid_date,
        dest="s_end_date",
        default=None,
        help="The end date (format YYYY-MM-DD) to select - Backtesting",
    )

    try:
        ns_parser = parse_known_args_and_warn(parser, other_args)
        if not ns_parser:
            return

        # BACKTESTING
        if ns_parser.s_end_date:

            if ns_parser.s_end_date < df_stock.index[0]:
                print(
                    "Backtesting not allowed, since End Date is older than Start Date of historical data\n"
                )
                return

            if (ns_parser.s_end_date < get_next_stock_market_days(
                    last_stock_day=df_stock.index[0],
                    n_next_days=5 + ns_parser.n_days)[-1]):
                print(
                    "Backtesting not allowed, since End Date is too close to Start Date to train model\n"
                )
                return

            future_index = get_next_stock_market_days(
                last_stock_day=ns_parser.s_end_date,
                n_next_days=ns_parser.n_days)

            if future_index[-1] > datetime.datetime.now():
                print(
                    "Backtesting not allowed, since End Date + Prediction days is in the future\n"
                )
                return

            df_future = df_stock[future_index[0]:future_index[-1]]
            df_stock = df_stock[:ns_parser.s_end_date]

        # Machine Learning model
        if ns_parser.s_order:
            t_order = tuple(int(ord) for ord in ns_parser.s_order.split(","))
            model = ARIMA(df_stock["5. adjusted close"].values,
                          order=t_order).fit()
            l_predictions = model.predict(
                start=len(df_stock["5. adjusted close"]) + 1,
                end=len(df_stock["5. adjusted close"]) + ns_parser.n_days,
            )
        else:
            if ns_parser.b_seasonal:
                model = pmdarima.auto_arima(
                    df_stock["5. adjusted close"].values,
                    error_action="ignore",
                    seasonal=True,
                    m=5,
                    information_criteria=ns_parser.s_ic,
                )
            else:
                model = pmdarima.auto_arima(
                    df_stock["5. adjusted close"].values,
                    error_action="ignore",
                    seasonal=False,
                    information_criteria=ns_parser.s_ic,
                )
            l_predictions = [
                i if i > 0 else 0
                for i in model.predict(n_periods=ns_parser.n_days)
            ]

        # Prediction data
        l_pred_days = get_next_stock_market_days(
            last_stock_day=df_stock["5. adjusted close"].index[-1],
            n_next_days=ns_parser.n_days,
        )
        df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price")

        if ns_parser.b_results:
            print(model.summary())
            print("")

        # Plotting
        plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI)
        plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2)
        if ns_parser.s_order:
            # BACKTESTING
            if ns_parser.s_end_date:
                plt.title(
                    f"BACKTESTING: ARIMA {str(t_order)} on {s_ticker} - {ns_parser.n_days} days prediction"
                )
            else:
                plt.title(
                    f"ARIMA {str(t_order)} on {s_ticker} - {ns_parser.n_days} days prediction"
                )
        else:
            # BACKTESTING
            if ns_parser.s_end_date:
                plt.title(
                    f"BACKTESTING: ARIMA {model.order} on {s_ticker} - {ns_parser.n_days} days prediction"
                )
            else:
                plt.title(
                    f"ARIMA {model.order} on {s_ticker} - {ns_parser.n_days} days prediction"
                )
        plt.xlim(df_stock.index[0],
                 get_next_stock_market_days(df_pred.index[-1], 1)[-1])
        plt.xlabel("Time")
        plt.ylabel("Share Price ($)")
        plt.grid(b=True, which="major", color="#666666", linestyle="-")
        plt.minorticks_on()
        plt.grid(b=True,
                 which="minor",
                 color="#999999",
                 linestyle="-",
                 alpha=0.2)
        plt.plot(
            [df_stock.index[-1], df_pred.index[0]],
            [df_stock["5. adjusted close"].values[-1], df_pred.values[0]],
            lw=1,
            c="tab:green",
            linestyle="--",
        )
        plt.plot(df_pred.index, df_pred, lw=2, c="tab:green")
        plt.axvspan(df_stock.index[-1],
                    df_pred.index[-1],
                    facecolor="tab:orange",
                    alpha=0.2)
        _, _, ymin, ymax = plt.axis()
        plt.vlines(df_stock.index[-1],
                   ymin,
                   ymax,
                   linewidth=1,
                   linestyle="--",
                   color="k")

        # BACKTESTING
        if ns_parser.s_end_date:
            plt.plot(
                df_future.index,
                df_future["5. adjusted close"],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.plot(
                [df_stock.index[-1], df_future.index[0]],
                [
                    df_stock["5. adjusted close"].values[-1],
                    df_future["5. adjusted close"].values[0],
                ],
                lw=1,
                c="tab:blue",
                linestyle="--",
            )

        if gtff.USE_ION:
            plt.ion()

        plt.show()

        # BACKTESTING
        if ns_parser.s_end_date:
            plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI)
            plt.subplot(211)
            plt.plot(
                df_future.index,
                df_future["5. adjusted close"],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.plot(df_pred.index, df_pred, lw=2, c="green")
            plt.scatter(df_future.index,
                        df_future["5. adjusted close"],
                        c="tab:blue",
                        lw=3)
            plt.plot(
                [df_stock.index[-1], df_future.index[0]],
                [
                    df_stock["5. adjusted close"].values[-1],
                    df_future["5. adjusted close"].values[0],
                ],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.scatter(df_pred.index, df_pred, c="green", lw=3)
            plt.plot(
                [df_stock.index[-1], df_pred.index[0]],
                [df_stock["5. adjusted close"].values[-1], df_pred.values[0]],
                lw=2,
                c="green",
                ls="--",
            )
            plt.title("BACKTESTING: Real data price versus Prediction")
            plt.xlim(df_stock.index[-1],
                     df_pred.index[-1] + datetime.timedelta(days=1))
            plt.xticks(
                [
                    df_stock.index[-1],
                    df_pred.index[-1] + datetime.timedelta(days=1)
                ],
                visible=True,
            )
            plt.ylabel("Share Price ($)")
            plt.grid(b=True, which="major", color="#666666", linestyle="-")
            plt.minorticks_on()
            plt.grid(b=True,
                     which="minor",
                     color="#999999",
                     linestyle="-",
                     alpha=0.2)
            plt.legend(["Real data", "Prediction data"])
            plt.xticks([])

            plt.subplot(212)
            plt.axhline(y=0, color="k", linestyle="--", linewidth=2)
            plt.plot(
                df_future.index,
                100 *
                (df_pred.values - df_future["5. adjusted close"].values) /
                df_future["5. adjusted close"].values,
                lw=2,
                c="red",
            )
            plt.scatter(
                df_future.index,
                100 *
                (df_pred.values - df_future["5. adjusted close"].values) /
                df_future["5. adjusted close"].values,
                c="red",
                lw=5,
            )
            plt.title(
                "BACKTESTING: Error between Real data and Prediction [%]")
            plt.plot(
                [df_stock.index[-1], df_future.index[0]],
                [
                    0,
                    100 * (df_pred.values[0] -
                           df_future["5. adjusted close"].values[0]) /
                    df_future["5. adjusted close"].values[0],
                ],
                lw=2,
                ls="--",
                c="red",
            )
            plt.xlim(df_stock.index[-1],
                     df_pred.index[-1] + datetime.timedelta(days=1))
            plt.xticks(
                [
                    df_stock.index[-1],
                    df_pred.index[-1] + datetime.timedelta(days=1)
                ],
                visible=True,
            )
            plt.xlabel("Time")
            plt.ylabel("Prediction Error (%)")
            plt.grid(b=True, which="major", color="#666666", linestyle="-")
            plt.minorticks_on()
            plt.grid(b=True,
                     which="minor",
                     color="#999999",
                     linestyle="-",
                     alpha=0.2)
            plt.legend(["Real data", "Prediction data"])

            if gtff.USE_ION:
                plt.ion()

            plt.show()

            # Refactor prediction dataframe for backtesting print
            df_pred.name = "Prediction"
            df_pred = df_pred.to_frame()
            df_pred["Real"] = df_future["5. adjusted close"]

            if gtff.USE_COLOR:

                patch_pandas_text_adjustment()

                print("Time         Real [$]  x  Prediction [$]")
                print(
                    df_pred.apply(price_prediction_backtesting_color,
                                  axis=1).to_string())
            else:
                print(df_pred[["Real", "Prediction"]].round(2).to_string())

            print("")
            print_prediction_kpis(df_pred["Real"].values,
                                  df_pred["Prediction"].values)

        else:
            # Print prediction data
            print_pretty_prediction(df_pred,
                                    df_stock["5. adjusted close"].values[-1])
        print("")

    except Exception as e:
        print(e, "\n")
Exemplo n.º 17
0
fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111)
ax = arma_mod30.resid.plot(ax=ax)
resid = arma_mod30.resid
stats.normaltest(resid)
fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111)
fig = qqplot(resid, line='q', ax=ax, fit=True)

fig = plt.figure(figsize=(12, 8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(resid.values.squeeze(), lags=40, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2)
plt.show()

r, q, p = sm.tsa.acf(resid.values.squeeze(), fft=True, qstat=True)
data = np.c_[range(1, 41), r[1:], q, p]
table = pd.DataFrame(data, columns=['lag', "AC", "Q", "Prob(>Q)"])
print(table.set_index('lag'))

predict_sunspots = arma_mod30.predict('1990', '2012', dynamic=True)
print(predict_sunspots)


def mean_forecast_err(y, yhat):
    return y.sub(yhat).mean()


mean_forecast_err(dta.SUNACTIVITY, predict_sunspots)
Exemplo n.º 18
0
class ARIMAImplementation(ModelImplementation):

    def __init__(self, log: Log = None, **params):
        super().__init__(log)
        self.params = params
        self.arima = None
        self.lambda_value = None
        self.scope = None
        self.actual_ts_len = None
        self.sts = None

    def fit(self, input_data):
        """ Class fit arima model on data

        :param input_data: data with features, target and ids to process
        """

        source_ts = np.array(input_data.features)
        # Save actual time series length
        self.actual_ts_len = len(source_ts)
        self.sts = source_ts

        # Apply box-cox transformation for positive values
        min_value = np.min(source_ts)
        if min_value > 0:
            pass
        else:
            # Making a shift to positive values
            self.scope = abs(min_value) + 1
            source_ts = source_ts + self.scope

        _, self.lambda_value = stats.boxcox(source_ts)
        transformed_ts = boxcox(source_ts, self.lambda_value)

        # Set parameters
        p = int(self.params.get('p'))
        d = int(self.params.get('d'))
        q = int(self.params.get('q'))
        params = {'order': (p, d, q)}
        self.arima = ARIMA(transformed_ts, **params).fit()

        return self.arima

    def predict(self, input_data, is_fit_pipeline_stage: bool):
        """ Method for time series prediction on forecast length

        :param input_data: data with features, target and ids to process
        :param is_fit_pipeline_stage: is this fit or predict stage for pipeline
        :return output_data: output data with smoothed time series
        """
        input_data = copy(input_data)
        parameters = input_data.task.task_params
        forecast_length = parameters.forecast_length
        old_idx = input_data.idx
        target = input_data.target

        # For training pipeline get fitted data
        if is_fit_pipeline_stage:
            fitted_values = self.arima.fittedvalues

            fitted_values = self._inverse_boxcox(predicted=fitted_values,
                                                 lambda_param=self.lambda_value)
            # Undo shift operation
            fitted_values = self._inverse_shift(fitted_values)

            diff = int(self.actual_ts_len - len(fitted_values))
            # If first elements skipped
            if diff != 0:
                # Fill nans with first values
                first_element = fitted_values[0]
                first_elements = [first_element] * diff
                first_elements.extend(list(fitted_values))

                fitted_values = np.array(first_elements)

            _, predict = _ts_to_table(idx=old_idx,
                                      time_series=fitted_values,
                                      window_size=forecast_length)

            new_idx, target_columns = _ts_to_table(idx=old_idx,
                                                   time_series=target,
                                                   window_size=forecast_length)

            # Update idx and target
            input_data.idx = new_idx
            input_data.target = target_columns

        # For predict stage we can make prediction
        else:
            start_id = old_idx[-1] - forecast_length + 1
            end_id = old_idx[-1]
            predicted = self.arima.predict(start=start_id,
                                           end=end_id)

            predicted = self._inverse_boxcox(predicted=predicted,
                                             lambda_param=self.lambda_value)

            # Undo shift operation
            predict = self._inverse_shift(predicted)
            # Convert one-dim array as column
            predict = np.array(predict).reshape(1, -1)
            new_idx = np.arange(start_id, end_id + 1)

            # Update idx
            input_data.idx = new_idx

        # Update idx and features
        output_data = self._convert_to_output(input_data,
                                              predict=predict,
                                              data_type=DataTypesEnum.table)
        return output_data

    def get_params(self):
        return self.params

    def _inverse_boxcox(self, predicted, lambda_param):
        """ Method apply inverse Box-Cox transformation """
        if lambda_param == 0:
            return np.exp(predicted)
        else:
            res = inv_boxcox(predicted, lambda_param)
            res = self._filling_gaps(res)
            return res

    def _inverse_shift(self, values):
        """ Method apply inverse shift operation """
        if self.scope is None:
            pass
        else:
            values = values - self.scope

        return values

    @staticmethod
    def _filling_gaps(res):
        nan_ind = np.argwhere(np.isnan(res))
        res[nan_ind] = -100.0

        # Gaps in first and last elements fills with mean value
        if 0 in nan_ind:
            res[0] = np.mean(res)
        if int(len(res) - 1) in nan_ind:
            res[int(len(res) - 1)] = np.mean(res)

        # Gaps in center of timeseries fills with linear interpolation
        if len(np.ravel(np.argwhere(np.isnan(res)))) != 0:
            gf = SimpleGapFiller()
            res = gf.linear_interpolation(res)

        return res
Exemplo n.º 19
0
import pandas as pd
from matplotlib import pyplot
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
import warnings

warnings.simplefilter('ignore', category=UserWarning)

dataset = "Paleo"

series = pd.read_csv(f'{dataset}.csv',
                     header=0,
                     index_col=0,
                     parse_dates=True,
                     squeeze=True)

for header in series.iloc[:, 11:17]:
    index_no = series.columns.get_loc(f'{header}')
    arima_model = ARIMA(series[f'{header}'], order=(1, 0, 1)).fit()
    pred = arima_model.predict(dynamic=False)
    rmse = mean_squared_error(series[f'{header}'], pred, squared=False)

    print(arima_model.summary())
    print(f"RMSE = {rmse}")

    pyplot.plot(series[f'{header}'])
    pyplot.plot(pred, color='red')
    pyplot.title(f'{series.columns[index_no]} intake over time ({dataset})')
    pyplot.xlabel('Date')
    pyplot.ylabel(f'{series.columns[index_no]}')
    pyplot.show()
        residuals.plot(kind='density', ax=ax[1]) # pandas series/dataframe function that plots each column separately
        plt.ylabel('Density', color='purple')
        plt.xlabel('Stock Price ($)', color='purple')
        plt.title('Residuals Density', color='purple')
        if save_images:
            plt.savefig(f'''images/{company}/{event}/{company} Stock Price Residuals, {event}, ARIMA Model (p, d, q) = {ARIMA_orders[j][i]}''', bbox_inches='tight')
            plt.clf()
        else:
            plt.show()

        # plot actual vs fitted(T_1) and forecasted(T_2) values over entire time frame (T_1 + T_2)
        # include confidence interval
        plt.plot(x1, y1)
        plt.plot(x1[-1:] + x2, y1[y1.shape[0] - 1:].append(y2), color='tab:orange')
        # fitted values (predictions)
        plt.plot(x1, model.predict(), color='tab:red')
        # plot forecasted values
        forecast = model.get_forecast(steps=y2.shape[0])  # class statsmodels.tsa.statespace.mlemodel.PredictionResults
        forecasted_values = forecast.predicted_mean # a pd.Series
        last_predict = model.predict(start=y1.shape[0] - 1) # a pd.Series
        plt.plot(x1[-1:] + x2, last_predict.append(pd.Series(forecasted_values)), color='tab:green')
        # plot 1-alpha% (let's use 95%) confidence interval
        alpha = 0.05
        confidence_interval = forecast.conf_int(alpha=alpha) # a pd.DataFrame
        confidence_interval_lower = confidence_interval['lower Close']
        confidence_interval_upper = confidence_interval['upper Close']
        # fills black with 10% opaqueness between lower bound and upper bound points
        plt.fill_between(x1[-1:] + x2, last_predict.append(confidence_interval_lower)
                         , last_predict.append(confidence_interval_upper)
                         , color='black', alpha=0.1)
        # customize x axis
Exemplo n.º 21
0
    ax.plot(train, label='train')
    ax.plot(test, label='test')
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)
    ax.plot(pred.index, pred.values, label='predicted', color='r')
    plt.legend()


p, d, q = 2, 1, 2
n = len(df)
train = df[:n * 9 // 10]
test = df[n * 9 // 10 + 1:]

mod = ARIMA(train, order=(p, d, q))
mod = mod.fit()
pred = mod.predict(start=len(train), end=len(df) - 1)

plt.figure(figsize=(FIG_WIDTH, FIG_HEIGHT))
plot_forecasting(train, test, pred, x_label=x_label, y_label=y_label)
plt.savefig(graphsDir + 'Covid19 - Train vs Test vs Predicted')

fig, axs = plt.subplots(5, 1, figsize=(FIG_WIDTH, 5 * FIG_HEIGHT))
fig.suptitle(f'ARIMA predictions (p={p},d={d},q={q})')
k = 0
for i in range(50, 100, 10):
    train = df[:n * i // 100]
    test = df[n * i // 100 + 1:]

    mod = ARIMA(train, order=(p, d, q))
    mod = mod.fit()
    pred = mod.predict(start=len(train), end=len(df) - 1)
Exemplo n.º 22
0
plt.plot(y_pred_out, color='green', label='ARMA Predictions')
plt.legend()

import numpy as np
from sklearn.metrics import mean_squared_error

arma_rmse = np.sqrt(
    mean_squared_error(test["BTC-USD"].values, y_pred_df["Predictions"]))
print("ARMA RMSE: ", arma_rmse)

ARIMAmodel = ARIMA(y, order=(5, 4, 2))
ARIMAmodel = ARIMAmodel.fit()

y_pred = ARIMAmodel.get_forecast(len(test.index))
y_pred_df = y_pred.conf_int(alpha=0.05)
y_pred_df["Predictions"] = ARIMAmodel.predict(start=y_pred_df.index[0],
                                              end=y_pred_df.index[-1])
y_pred_df.index = test.index
y_pred_out = y_pred_df["Predictions"]
plt.plot(y_pred_out, color='Yellow', label='ARIMA Predictions')
plt.legend()

import numpy as np
from sklearn.metrics import mean_squared_error

arma_rmse = np.sqrt(
    mean_squared_error(test["BTC-USD"].values, y_pred_df["Predictions"]))
print("ARIMA RMSE: ", arma_rmse)

SARIMAXmodel = SARIMAX(y, order=(5, 4, 2), seasonal_order=(2, 2, 2, 12))
SARIMAXmodel = SARIMAXmodel.fit()
Exemplo n.º 23
0
print(df.shape)
train = df.iloc[:-30]
test = df.iloc[-30:]
print("train and test shape:", train.shape, test.shape)

# fitting the train model
model = ARIMA(train["close"], order=(2, 1, 2))
model = model.fit()
print("Model summary for training set:", model.summary())

# now we predict via specifying the start and end range
# in this case, we want to compare prediction with the testing dataset
start = len(train)
end = len(train) + len(test) - 1
# if the predicted values don't have date values as index, uncomment specified line below*
prediction = model.predict(start=start, end=end,
                           typ="levels").rename("ARIMA Predictions")
prediction.index = df.index[start:end + 1]  # uncomment if needed*
# plotting comparison of predicted vs test
plt.title("Prediction vs Testing Set")
test["close"].plot(legend=True)
prediction.plot(legend=True)
plt.show()

# mean squared error for analysis
print(test["close"].mean())
rmse = sqrt(mean_squared_error(prediction, test["close"]))
print(rmse)

# training the model now based on entire dataset to make future predictions
model2 = ARIMA(df["close"],
               order=(2, 1, 2))  # (need to experiment w/ p, d, q values)
Exemplo n.º 24
0
def submit_ts():

    f = request.files['userfile']
    f.save(f.filename)
    print(f)

    s1 = request.form['query1']
    s2 = request.form['query2']
    s3 = int(request.form['query3'])
    s4 = request.form['query4']
    s5 = request.form['query5']
    if s5 == 'Yes':
        s6 = request.form['query6']
        s7 = request.form['query7']

    t = int(request.form['query8'])

    d1 = f.filename
    print(d1)
    d3 = pd.read_csv(d1)

    if s3 == 1:
        d3[s1] = pd.to_datetime(d3[s1], format=s2, infer_datetime_format=True)
        list1 = []
        list3 = []
        list9 = []
        """
        for i in range(len(d3[s4])):
            try:
                list1.append(int(d3[s4][i]))
            except:
                list3.append(i)
                continue
        for i in range(len(list3)):
            n2=d3[s4][list3[i]]
            d3[s4].replace(n2,np.nan,inplace=True)
        for i in range(len(d3)):
            d3[s4].fillna(d3[s4].median(),inplace=True)
        d3[s4]=d3[s4].astype(int)"""
        if s5 == 'No':
            datewise = d3.groupby([s1]).agg({s4: 'sum'})
        elif s5 == 'Yes':
            s8 = d3[d3[s6] == s7]
            datewise = s8.groupby([s1]).agg({s4: 'sum'})

        #ARIMA

        datewise = datewise.astype('float32')
        model_train = datewise.iloc[:int(datewise.shape[0] * 0.95)]
        valid = datewise.iloc[int(datewise.shape[0] * 0.95):]
        n11 = pd.infer_freq(datewise.index, warn=True)
        list9 = []
        model_arima = auto_arima(model_train[s4],
                                 trace=True,
                                 error_action='ignore',
                                 start_p=1,
                                 start_q=1,
                                 max_p=3,
                                 max_q=3,
                                 suppress_warnings=True,
                                 stepwise=False,
                                 seasonal=False)
        model_arima.fit(model_train[s4])
        prediction_arima = model_arima.predict(len(valid))
        print("Root Mean Square Error for ARIMA Model: ",
              np.sqrt(mean_squared_error(list(valid[s4]), (prediction_arima))))
        list9.append(
            np.sqrt(mean_squared_error(list(valid[s4]), (prediction_arima))))
        print('\n')
        m1 = model_arima.order
        model = ARIMA(datewise[s4], order=m1)
        results = model.fit()
        s = t - 1
        forecast_arima = results.predict(len(datewise),
                                         len(datewise) + s,
                                         typ='levels').rename(s4)

        #Prophet
        datewise1 = datewise.reset_index()
        datewise1.rename(columns={s1: 'ds', s4: 'y'}, inplace=True)
        train = datewise1.iloc[:int(datewise1.shape[0] * 0.95)]
        valid = datewise1.iloc[int(datewise1.shape[0] * 0.95):]
        m = Prophet(weekly_seasonality=True)
        m.fit(train)
        future = m.make_future_dataframe(periods=len(valid), freq=n11)
        forecast = m.predict(future)
        predictions = forecast.tail(len(valid))['yhat']
        print('\n')
        print("Root Mean Squared Error for Prophet Model: ",
              rmse(valid['y'], predictions))
        print('\n')
        list9.append(rmse(valid['y'], predictions))
        m = Prophet(weekly_seasonality=True)
        m.fit(datewise1)
        future = m.make_future_dataframe(periods=t, freq=n11)
        forecast = m.predict(future)
        forecast_prophet = forecast[['ds', 'yhat']].tail(t)

        #LSTM

        train = datewise.iloc[:int(datewise.shape[0] * 0.95)]
        test = datewise.iloc[int(datewise.shape[0] * 0.95):]
        scaler = MinMaxScaler()
        scaler.fit(train)
        scaled_train = scaler.transform(train)
        scaled_test = scaler.transform(test)

        n_input = len(test)
        n_features = 1
        generator = TimeseriesGenerator(scaled_train,
                                        scaled_train,
                                        length=n_input,
                                        batch_size=1)
        model = Sequential()
        model.add(
            LSTM(150, activation='relu', input_shape=(n_input, n_features)))
        model.add(Dense(1))
        model.compile(optimizer='adam', loss='mse')

        model.fit_generator(generator, epochs=30)

        first_eval_batch = scaled_train[-n_input:]
        test_predictions = []
        first_eval_batch = scaled_train[-n_input:]
        current_batch = first_eval_batch.reshape((1, n_input, n_features))
        for i in range(len(test)):
            current_pred = model.predict(current_batch)[0]
            test_predictions.append(current_pred)
            current_batch = np.append(current_batch[:, 1:, :],
                                      [[current_pred]],
                                      axis=1)
        true_predictions = scaler.inverse_transform(test_predictions)
        test['predictions'] = true_predictions
        list9.append(rmse(test[s4], test['predictions']))
        print('\n')
        print("Root Mean Square Error for LSTM Model: ",
              rmse(test[s4], test['predictions']))
        print('\n')
        train = datewise
        scaler.fit(train)
        train = scaler.transform(train)
        n_input = len(test)
        n_features = 1
        generator = TimeseriesGenerator(train,
                                        train,
                                        length=n_input,
                                        batch_size=1)
        model.fit_generator(generator, epochs=30)
        test_predictions = []
        first_eval_batch = train[-n_input:]
        current_batch = first_eval_batch.reshape((1, n_input, n_features))
        for i in range(t):
            current_pred = model.predict(current_batch)[0]
            test_predictions.append(current_pred)
            current_batch = np.append(current_batch[:, 1:, :],
                                      [[current_pred]],
                                      axis=1)
        from pandas.tseries.offsets import DateOffset
        add_dates = [
            datewise.index[-1] + DateOffset(months=x) for x in range(0, t + 1)
        ]
        future_dates = pd.DataFrame(index=add_dates[1:],
                                    columns=datewise.columns)
        df_predict = pd.DataFrame(scaler.inverse_transform(test_predictions),
                                  index=future_dates[-t:].index,
                                  columns=[s4])
        d_proj = df_predict
        d_proj.reset_index(drop=True, inplace=True)
        forecast_prophet.reset_index(drop=True, inplace=True)
        d1 = pd.DataFrame(forecast_prophet['ds'])
        lstm = pd.concat([d1, d_proj], axis=1)
        #print('\n')
        #t=str(t)
        #print('Forecasted Data of '+s4+' feature for '+t+ ' days : ' )
        #print('\n')
        small = float('inf')
        for i in range(len(list9)):
            if list9[i] < small:
                small = list9[i]
        no = list9.index(small)

        if no == 0:
            forecast_arima = pd.DataFrame(forecast_arima)
            forecast_arima.reset_index(drop=True, inplace=True)
            d18 = pd.DataFrame(forecast_prophet['ds'])
            d18.reset_index(drop=True, inplace=True)
            forecast_arima = pd.concat([d18, forecast_arima], axis=1)
            forecast_arima.rename(columns={'ds': s1}, inplace=True)
            forecast_data = forecast_arima
            forecast_data1 = forecast_data.set_index(s1)
            forecast_data1
            #print(forecast_data1)

        elif no == 1:
            forecast_prophet.rename(columns={
                'ds': s1,
                'yhat': s4
            },
                                    inplace=True)
            forecast_data = forecast_prophet
            forecast_data1 = forecast_data.set_index(s1)
            #plt.plot(datewise[s4],label="Original Data")
            #plt.plot(forecast_data[s4],label="Forecasted Data")
            #plt.legend()
            #plt.xlabel("Date")
            #plt.ylabel('Confirmed Cases')
            #plt.title("Confirmed Cases Prophet Model Forecasting")
            #plt.xticks(rotation=90)

        elif no == 2:
            lstm.rename(columns={'ds': s1, 'yhat': s4}, inplace=True)
            forecast_data = lstm
            forecast_data1 = forecast_data.set_index(s1)
            #plt.plot(datewise[s4],label="Original Data")
            #plt.plot(forecast_data[s4],label="Forecasted Data")
            #plt.legend()
            #plt.xlabel("Date")
            #plt.ylabel('Confirmed Cases')
            #plt.title("Confirmed Cases LSTM Model Forecasting")
            #plt.xticks(rotation=90)"""

        fig, ax = plt.subplots(nrows=1, ncols=1)
        ax.plot(datewise[s4], label="Original Data")
        ax.plot(forecast_data1[s4], label="Forecasted Data")
        ax.legend()
        ax.set_xlabel("Date")
        ax.set_ylabel(s4)
        ax.set_title('forecasted data of ' + s4)
        plt.xticks(rotation=90)
        plt.show()
        n = randint(0, 1000000000000)
        n = str(n)
        fig.savefig(
            os.path.join(app.config["IMAGE_UPLOADS"], n + 'time_series.png'))

        full_filename = os.path.join(app.config["IMAGE_UPLOADS"],
                                     n + 'time_series.png')
    # VARMAX
    if s3 > 1:
        n2 = s4
        n4 = n2.split()
        n5 = n2.split()
        if s5 == 'No':
            datewise = d3.groupby([s1]).agg({n4[0]: 'sum'})
            n4.pop(0)
            for i in range(len(n4)):
                d3i = d3.groupby([s1]).agg({n4[i]: 'sum'})
                datewise = pd.concat([datewise, d3i], axis=1)
        elif s5 == 'Yes':
            #s6=str(input('Enter the feature name from which who want to pick the category (eg:- country): '))
            #s7=str(input('Ente the category name from'+' '+s6+' '+'to forecast'+' '+s4+' '+'  : '))
            s8 = d3[d3[s6] == s7]
            datewise = s8.groupby([s1]).agg({n4[0]: 'sum'})
            n4.pop(0)
            for i in range(len(n4)):
                d3i = s8.groupby([s1]).agg({n4[i]: 'sum'})
                datewise = pd.concat([datewise, d3i], axis=1)
                #datewise=pd.concat([datewise,d3i],axis=1)
        list1 = []
        list2 = []
        list3 = []
        list4 = []
        for i in range(len(n5)):
            model_arima = auto_arima(datewise[n5[i]],
                                     trace=True,
                                     error_action='ignore',
                                     start_p=1,
                                     start_q=1,
                                     max_p=3,
                                     max_q=3,
                                     suppress_warnings=True,
                                     stepwise=False,
                                     seasonal=False)
            list1.append(model_arima.order)
        for i in range(len(list1)):
            list2.append(list1[i][0])
            list3.append(list1[i][1])
            list4.append(list1[i][2])
        list2.sort(reverse=True)
        p = list2[0]
        list3.sort(reverse=True)
        d = list3[0]
        list4.sort(reverse=True)
        q = list4[0]
        if d < 1:
            df_transformed = datewise
        elif d == 1:
            df_transformed = datewise.diff()
            df_transformed = df_transformed.dropna()
        elif d > 1:
            df_transformed = datewise.diff().diff()
            df_transformed = df_transformed.dropna()

        nobs = 12
        train, test = df_transformed[0:-nobs], df_transformed[-nobs:]
        model = VARMAX(train, order=(p, q), trend='c')
        results = model.fit(maxiter=100, disp=False)
        results.summary()
        df_forecast = results.forecast(nobs)
        for i in range(len(n5)):
            j = '1d'
            df_forecast[n5[i] + j] = (
                datewise[n5[i]].iloc[-nobs - 1] -
                datewise[n5[i]].iloc[-nobs - 2]) + df_forecast[n5[i]].cumsum()
            df_forecast[n5[i] + 'forecasteed'] = datewise[n5[i]].iloc[
                -nobs - 1] + df_forecast[n5[i]].cumsum()
        list89 = df_forecast.columns
        list98 = []
        for i in range(len(list89)):
            if list89[i][-11:] == 'forecasteed':
                list98.append(list89[i])
        d_new = pd.concat([datewise.iloc[-12:], df_forecast[list98]], axis=1)
        for i in range(len(n5)):
            RMSE = rmse(datewise[n5[i]][-nobs:], df_forecast[list98[i]])
            print('Root Mean Square Error for ' + n5[i] + ':', RMSE)
        model = VARMAX(df_transformed, order=(p, q), trend='c')
        results = model.fit(maxiter=100, disp=False)
        results.summary()
        #t=int(input('Enter number of days to forecast ? :'))
        df_forecast = results.forecast(t)
        for i in range(len(n5)):
            j = '2d'
            df_forecast[n5[i] + j] = (
                datewise[n5[i]].iloc[-t - 1] -
                datewise[n5[i]].iloc[-t - 2]) + df_forecast[n5[i]].cumsum()
            df_forecast[n5[i] + ' Forecasted'] = datewise[n5[i]].iloc[
                -t - 1] + df_forecast[n5[i]].cumsum()
        list89 = df_forecast.columns
        list98 = []
        for i in range(len(list89)):
            if list89[i][-11:] == ' Forecasted':
                list98.append(list89[i])
        df_forecast = df_forecast[list98]
        df_forecast.reset_index(inplace=True)
        df_forecast.rename(columns={'index': s1}, inplace=True)
        df_forecast.set_index(s1, inplace=True)
        forecast_data1 = df_forecast[list98]

        fig, b = plt.subplots(len(n5), 2, figsize=(15, 5))
        for i in range(len(n5)):
            datewise[n5[i]].plot(kind='line', ax=b[i][0], title=n5[i])
            df_forecast[list98[i]].plot(kind='line',
                                        ax=b[i][1],
                                        title='Forecasted data of ' + n5[i],
                                        color='orange')
            fig.tight_layout(pad=1.0)
        plt.show()

        n = randint(0, 1000000000000)
        n = str(n)
        fig.savefig(
            os.path.join(app.config["IMAGE_UPLOADS"], n + 'time_series.png'))

        full_filename = os.path.join(app.config["IMAGE_UPLOADS"],
                                     n + 'time_series.png')

    return render_template('step1_img.html',
                           user_image=full_filename,
                           tables=[forecast_data1.to_html(classes='page')],
                           titles=['na', 'Job'],
                           query1=request.form['query1'],
                           query2=request.form['query2'],
                           query3=request.form['query3'],
                           query4=request.form['query4'],
                           query5=request.form['query5'],
                           query6=request.form['query6'],
                           query7=request.form['query7'],
                           query8=request.form['query8'])
Exemplo n.º 25
0
    # 1. Check distribution of residual errors
    arima.resid.plot(kind='kde', grid=False)
    plt.title('Residual Errors Distribution', size=17)
    plt.xlabel('Residual Errors', size=13)
    plt.ylabel('Density', size=13)
    plt.show()
    # 2. Check statistics
    print('[Basic statistics]')
    print(arima.resid.describe())

if query == '2':
    # 1. Overlap predictions(+1 step to the last observation) onto the truth
    fig, ax = plt.subplots(figsize=(13, 7))
    plt.plot(daily.Date_x, daily.increased_confirmed,
             color='#33322B', ls=':', lw=3)
    plt.plot(daily.Date_x, arima.predict())
    title = 'ARIMA (one-step forecasting for every date) in ' + region
    plt.title(title, size=17)
    plt.xlabel('Date', size=13)
    plt.ylabel('Number of daily confirmed cases', size=13)
    ax.set_xticks(ax.get_xticks()[::int(len(daily.Date_x)/8)])
    plt.legend(['Truth', 'Prediction'], loc='upper left')
    plt.show()

    # 2. Check scores
    meae = metrics.median_absolute_error(
        daily.increased_confirmed, arima.predict())
    mae = metrics.mean_absolute_error(
        daily.increased_confirmed, arima.predict())
    rmse = metrics.mean_squared_error(
        daily.increased_confirmed, arima.predict())
Exemplo n.º 26
0
cv_manual_ar = cross_val_score(estimator=m_full, X=X_full, y=y_full, cv=splits)
print(f'CV-Score (Manual AR): {round(cv_manual_ar.mean(),3)}')
'''AutoRegressive Model - Statsmodels (on data taking into account trend and seasonality)'''

ar_model = AutoReg(y_season, lags=3, exog=X_season).fit()
#print(ar_model.summary())
prediction_ar = ar_model.predict()
'''ARIMA Model - Statsmodels (on data taking into account trend and seasonality) - very slow!!'''

#arima_model = ARIMA(y_season, order=(1,0,1), exog=X_season).fit()
#print(arima_model.summary())
#prediction_arima = arima_model.predict()
'''ARIMA Model - only on remainder '''

arima_model = ARIMA(remainder, order=(2, 0, 2), freq='D').fit()
prediction_arima = arima_model.predict()
prediction_arima.name = 'Arima_lags'

# Use prediction of ARIMA Model as feature(includes lags2 , MA 2) for LinearRegression
X_arima = X_season.join(prediction_arima)
m_arima = LinearRegression()
m_arima.fit(X_arima, y_season)
outcome_arima = pd.Series(m_arima.predict(X_arima), index=y_season.index)
'''Plot data as residuals '''
#sns.lineplot(x=train.loc['2000'].index, y='temp', data=train.loc['2000'], label = 'true values')
#sns.lineplot(x=train.loc['2000'].index, y='full_model', data=train.loc['2000'], label = 'Manual AR')
sns.lineplot(x=train.loc['2000'].index,
             y=(train['temp'].loc['2000'] - train['full_model'].loc['2000']),
             label='Residuals Manual AR')
plt.show()
Exemplo n.º 27
0
def arima(l_args, s_ticker, s_interval, df_stock):
    parser = argparse.ArgumentParser(
        prog='arima',
        description="""In statistics and econometrics, and in particular in time
                                     series analysis, an autoregressive integrated moving average (ARIMA) model
                                     is a generalization of an autoregressive moving average (ARMA) model. Both
                                     of these models are fitted to time series data either to better understand
                                     the data or to predict future points in the series (forecasting).
                                     ARIMA(p,d,q) where parameters p, d, and q are non-negative integers, p is
                                     the order (number of time lags) of the autoregressive model, d is the degree
                                     of differencing (the number of times the data have had past values subtracted),
                                     and q is the order of the moving-average model."""
    )

    parser.add_argument('-d',
                        "--days",
                        action="store",
                        dest="n_days",
                        type=check_positive,
                        default=5,
                        help='prediction days.')
    parser.add_argument('-i',
                        "--ic",
                        action="store",
                        dest="s_ic",
                        type=str,
                        default='aic',
                        choices=['aic', 'aicc', 'bic', 'hqic', 'oob'],
                        help='information criteria.')
    parser.add_argument('-s',
                        "--seasonal",
                        action="store_true",
                        default=False,
                        dest="b_seasonal",
                        help='Use weekly seasonal data.')
    parser.add_argument('-o',
                        "--order",
                        action="store",
                        dest="s_order",
                        type=str,
                        help='arima model order (p,d,q) in format: pdq.')
    parser.add_argument('-r',
                        "--results",
                        action="store_true",
                        dest="b_results",
                        default=False,
                        help='results about ARIMA summary flag.')

    try:
        (ns_parser, l_unknown_args) = parser.parse_known_args(l_args)

        if l_unknown_args:
            print(
                f"The following args couldn't be interpreted: {l_unknown_args}\n"
            )
            return

        # Machine Learning model
        if ns_parser.s_order:
            t_order = tuple([int(ord) for ord in list(ns_parser.s_order)])
            model = ARIMA(df_stock['5. adjusted close'].values,
                          order=t_order).fit()
            l_predictions = model.predict(
                start=len(df_stock['5. adjusted close']) + 1,
                end=len(df_stock['5. adjusted close']) + ns_parser.n_days)
        else:
            if ns_parser.b_seasonal:
                model = pmdarima.auto_arima(
                    df_stock['5. adjusted close'].values,
                    error_action='ignore',
                    seasonal=True,
                    m=5,
                    information_criteria=ns_parser.s_ic)
            else:
                model = pmdarima.auto_arima(
                    df_stock['5. adjusted close'].values,
                    error_action='ignore',
                    seasonal=False,
                    information_criteria=ns_parser.s_ic)
            l_predictions = model.predict(n_periods=ns_parser.n_days)

        # Prediction data
        l_pred_days = get_next_stock_market_days(
            last_stock_day=df_stock['5. adjusted close'].index[-1],
            n_next_days=ns_parser.n_days)
        df_pred = pd.Series(l_predictions, index=l_pred_days, name='Price')

        if ns_parser.b_results:
            print(model.summary())
            print("")

        # Plotting
        plt.plot(df_stock.index, df_stock['5. adjusted close'], lw=2)
        if ns_parser.s_order:
            plt.title(
                f"ARIMA {str(t_order)} on {s_ticker} - {ns_parser.n_days} days prediction"
            )
        else:
            plt.title(
                f"ARIMA {model.order} on {s_ticker} - {ns_parser.n_days} days prediction"
            )
        plt.xlim(df_stock.index[0],
                 get_next_stock_market_days(df_pred.index[-1], 1)[-1])
        plt.xlabel('Time')
        plt.ylabel('Share Price ($)')
        plt.grid(b=True, which='major', color='#666666', linestyle='-')
        plt.minorticks_on()
        plt.grid(b=True,
                 which='minor',
                 color='#999999',
                 linestyle='-',
                 alpha=0.2)
        plt.plot([df_stock.index[-1], df_pred.index[0]],
                 [df_stock['5. adjusted close'].values[-1], df_pred.values[0]],
                 lw=1,
                 c='tab:green',
                 linestyle='--')
        plt.plot(df_pred.index, df_pred, lw=2, c='tab:green')
        plt.axvspan(df_stock.index[-1],
                    df_pred.index[-1],
                    facecolor='tab:orange',
                    alpha=0.2)
        xmin, xmax, ymin, ymax = plt.axis()
        plt.vlines(df_stock.index[-1],
                   ymin,
                   ymax,
                   linewidth=1,
                   linestyle='--',
                   color='k')
        plt.show()

        # Print prediction data
        print("Predicted share price:")
        df_pred = df_pred.apply(lambda x: f"{x:.2f} $")
        print(df_pred.to_string())
        print("")

    except:
        print("")