Пример #1
0
def main():
    #data
    dfYield=pd.read_excel('LW_monthly.xlsx')
    dfYield.head()
    (iT, iN)=dfYield.shape
    lMaturity=list(dfYield.columns.values)
    
    mData=np.array(dfYield.values)
    mYield=mData[:, 1:]
    #print(mYield.shape)
    #print(iN)
       
    mThetaOpt= NonLinearLeastSquares(lMaturity, mYield, iT, iN)
    #print('\n\n', np.max(mThetaOpt[:, 3]))
    
    vLambda = mThetaOpt[:,3]
    
    dLambdaFix = np.average(vLambda)
    #print('\n The average Lambda is: ', dLambdaFix)
    
    (iP, iQ)= (1,1)
    iBeta= 2            #The beta which is regressed with ARMA(P,Q)

    mBeta= mThetaOpt                                  
    (vP, vSr, dSigmastd, vARcoef, vMAcoef, dLL)= ARMA_Model(iBeta, mBeta, iP, iQ)
    
    #compare package results
    arma_mod20 = ARIMA(mBeta[:,iBeta-1], order=(iP,0,iQ)).fit()
    print(arma_mod20.summary())

    (dAIC, dBIC)= Model_AICBIC(dSigmastd, iP, iQ, iT, dLL)
    
    #T+12
    iH=12
    Forecast(iT, dSigmastd, iBeta, mBeta, vARcoef, vMAcoef, iH, iQ, iP)
Пример #2
0
def train_model(series, pdq, verbose=False):
    """ Train ARIMA model """
    model = ARIMA(series.values, order=pdq)
    model = model.fit()
    if verbose:
        print(model.summary())
    return model
Пример #3
0
def arima_model(serie, order, model_report=True, get_residuals=True):
    """
    

    Parameters
    ----------
    serie : pd.Series
        La série selectionnée.
    order : tuple
        L'ordre du modèle ARIMA.
    model_report : bool, optional
        True si l'on veut le rapport du modèle, False sinon.
    get_residuals : bool, optional
        True si l'on veut la série des résidus, False sinon.
        
    Returns
    -------
    tuple : (model , pd.Series) 
        Renvoie le rapport du modèle et la série des résidus.

    """
    model = ARIMA(serie, order=order)
    model = model.fit()

    if model_report:
        print(model.summary())

    if get_residuals:
        residuals = pd.DataFrame(model.resid).rename({0: 'res'}, axis=1)['res']
        return model, residuals

    else:
        return model
Пример #4
0
def test_ARIMA():

    from statsmodels.tsa.arima.model import ARIMA

    df = get_ytw_test()

    model_fit = ARIMA(endog=df['CS-Aaa-3MO'],
                      exog=None,
                      order=(1, 1, 0),
                      trend=None).fit()
    print(model_fit.summary())
Пример #5
0
def q3_b():
    print("begin")
    df = get_data("data/HW5_WMT.xlsx", "HW5_WMT")
    df.index = pd.to_datetime(df.index, format='%Y%m%d')
    df['first_difference'] = np.log(df['WMT']) - np.log(df['WMT']).shift(1)
    df['season_difference'] = np.log(df['WMT']) - np.log(df['WMT']).shift(4)
    df = df.head(df.index.get_loc('2016-03-31'))
    print(df)
    ARIMA_model = ARIMA(np.log(df['WMT']),
                        order=(0, 1, 1)).fit()  # p=0, d=1, q=1
    print(ARIMA_model.summary())
    ARIMA_model.predict().plot()
    np.log(df['WMT']).plot()
    plt.show()
Пример #6
0
def arima_model(vEndog, mExog=None, tPDQ=None):
    """
    Fits an ARIMA model. Order can be specified or determined by auto_arima.
    Differently from other models, it does not work on patsy/R formula syntax.

    :param vEndog: DataFrame column/numpy vector containing endogenous data (which will be regressed upon itself)
    :param mExog: vector/matrix containing exogenous data. Defaults to None
    :param tPDQ: tuple (p, d, q) containing order of the model;
        p: number of autorregressions (AR)
        q: number of differentiations (I)
        q: number of past prevision errors/moving averages (MA)
        If None (default), performs an auto_arima()

    :return mod: fitted model instance
    """

    ## Creating model
    # If order is specified
    if tPDQ is not None:
        # Conditional on whether there are exogenous variables
        if mExog is None:
            mod_arima = ARIMA(endog=vEndog, order=tPDQ).fit(cov_type='robust')
        else:
            mod_arima = ARIMA(endog=vEndog, exog=mExog, order=tPDQ).fit(cov_type='robust')
    # If order isn't specified, use auto_arima()
    else:
        mod_arima = auto_arima(y=vEndog, X=mExog)
        mod_arima = mod_arima.fit(y=vEndog, cov_type='robust')

    ## Printing summary and diagnostics
    print(mod_arima.summary())

    print("For heteroskdasticity, check Prob(H), where H0: homoskedasticity, and the standardized residual graph.")
    print("If there is hetero., the model error can't be a white noise (which is the desired thing).")
    print("Estimaed Density and Jarque-Bera have information on normality.")
    print("In the correlogram, all lollipops must be inside of the shaded area.")

    # Plots
    mod_arima.plot_diagnostics(figsize=(10, 10))
    plt.show()

    # Residual means
    tMean0 = stats.ttest_1samp(mod_arima.resid(), 0, nan_policy='omit')
    print(f"P-value for the test that residual mean is equal to 0: {np.around(tMean0[1], 5)}.")
    print("If p < 0.05, H0 is rejected and the residual mean is different from 0 (not ideal).")

    ## Returning
    return mod_arima
Пример #7
0
def arima(other_args: List[str], s_ticker: str, df_stock: pd.DataFrame):
    """
    ARIMA prediction
    Parameters
    ----------
    other_args: List[str]
        Argparse arguments
    s_ticker: str
        ticker
    df_stock: pd.DataFrame
        Dataframe of prices

    """
    parser = argparse.ArgumentParser(
        add_help=False,
        prog="arima",
        description="""
            In statistics and econometrics, and in particular in time series analysis, an
            autoregressive integrated moving average (ARIMA) model is a generalization of an
            autoregressive moving average (ARMA) model. Both of these models are fitted to time
            series data either to better understand the data or to predict future points in the
            series (forecasting). ARIMA(p,d,q) where parameters p, d, and q are non-negative
            integers, p is the order (number of time lags) of the autoregressive model, d is the
            degree of differencing (the number of times the data have had past values subtracted),
            and q is the order of the moving-average model.
        """,
    )

    parser.add_argument(
        "-d",
        "--days",
        action="store",
        dest="n_days",
        type=check_positive,
        default=5,
        help="prediction days.",
    )
    parser.add_argument(
        "-i",
        "--ic",
        action="store",
        dest="s_ic",
        type=str,
        default="aic",
        choices=["aic", "aicc", "bic", "hqic", "oob"],
        help="information criteria.",
    )
    parser.add_argument(
        "-s",
        "--seasonal",
        action="store_true",
        default=False,
        dest="b_seasonal",
        help="Use weekly seasonal data.",
    )
    parser.add_argument(
        "-o",
        "--order",
        action="store",
        dest="s_order",
        type=str,
        help="arima model order (p,d,q) in format: p,d,q.",
    )
    parser.add_argument(
        "-r",
        "--results",
        action="store_true",
        dest="b_results",
        default=False,
        help="results about ARIMA summary flag.",
    )
    parser.add_argument(
        "-e",
        "--end",
        action="store",
        type=valid_date,
        dest="s_end_date",
        default=None,
        help="The end date (format YYYY-MM-DD) to select - Backtesting",
    )

    try:
        ns_parser = parse_known_args_and_warn(parser, other_args)
        if not ns_parser:
            return

        # BACKTESTING
        if ns_parser.s_end_date:

            if ns_parser.s_end_date < df_stock.index[0]:
                print(
                    "Backtesting not allowed, since End Date is older than Start Date of historical data\n"
                )
                return

            if (ns_parser.s_end_date < get_next_stock_market_days(
                    last_stock_day=df_stock.index[0],
                    n_next_days=5 + ns_parser.n_days)[-1]):
                print(
                    "Backtesting not allowed, since End Date is too close to Start Date to train model\n"
                )
                return

            future_index = get_next_stock_market_days(
                last_stock_day=ns_parser.s_end_date,
                n_next_days=ns_parser.n_days)

            if future_index[-1] > datetime.datetime.now():
                print(
                    "Backtesting not allowed, since End Date + Prediction days is in the future\n"
                )
                return

            df_future = df_stock[future_index[0]:future_index[-1]]
            df_stock = df_stock[:ns_parser.s_end_date]

        # Machine Learning model
        if ns_parser.s_order:
            t_order = tuple(int(ord) for ord in ns_parser.s_order.split(","))
            model = ARIMA(df_stock["5. adjusted close"].values,
                          order=t_order).fit()
            l_predictions = model.predict(
                start=len(df_stock["5. adjusted close"]) + 1,
                end=len(df_stock["5. adjusted close"]) + ns_parser.n_days,
            )
        else:
            if ns_parser.b_seasonal:
                model = pmdarima.auto_arima(
                    df_stock["5. adjusted close"].values,
                    error_action="ignore",
                    seasonal=True,
                    m=5,
                    information_criteria=ns_parser.s_ic,
                )
            else:
                model = pmdarima.auto_arima(
                    df_stock["5. adjusted close"].values,
                    error_action="ignore",
                    seasonal=False,
                    information_criteria=ns_parser.s_ic,
                )
            l_predictions = [
                i if i > 0 else 0
                for i in model.predict(n_periods=ns_parser.n_days)
            ]

        # Prediction data
        l_pred_days = get_next_stock_market_days(
            last_stock_day=df_stock["5. adjusted close"].index[-1],
            n_next_days=ns_parser.n_days,
        )
        df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price")

        if ns_parser.b_results:
            print(model.summary())
            print("")

        # Plotting
        plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI)
        plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2)
        if ns_parser.s_order:
            # BACKTESTING
            if ns_parser.s_end_date:
                plt.title(
                    f"BACKTESTING: ARIMA {str(t_order)} on {s_ticker} - {ns_parser.n_days} days prediction"
                )
            else:
                plt.title(
                    f"ARIMA {str(t_order)} on {s_ticker} - {ns_parser.n_days} days prediction"
                )
        else:
            # BACKTESTING
            if ns_parser.s_end_date:
                plt.title(
                    f"BACKTESTING: ARIMA {model.order} on {s_ticker} - {ns_parser.n_days} days prediction"
                )
            else:
                plt.title(
                    f"ARIMA {model.order} on {s_ticker} - {ns_parser.n_days} days prediction"
                )
        plt.xlim(df_stock.index[0],
                 get_next_stock_market_days(df_pred.index[-1], 1)[-1])
        plt.xlabel("Time")
        plt.ylabel("Share Price ($)")
        plt.grid(b=True, which="major", color="#666666", linestyle="-")
        plt.minorticks_on()
        plt.grid(b=True,
                 which="minor",
                 color="#999999",
                 linestyle="-",
                 alpha=0.2)
        plt.plot(
            [df_stock.index[-1], df_pred.index[0]],
            [df_stock["5. adjusted close"].values[-1], df_pred.values[0]],
            lw=1,
            c="tab:green",
            linestyle="--",
        )
        plt.plot(df_pred.index, df_pred, lw=2, c="tab:green")
        plt.axvspan(df_stock.index[-1],
                    df_pred.index[-1],
                    facecolor="tab:orange",
                    alpha=0.2)
        _, _, ymin, ymax = plt.axis()
        plt.vlines(df_stock.index[-1],
                   ymin,
                   ymax,
                   linewidth=1,
                   linestyle="--",
                   color="k")

        # BACKTESTING
        if ns_parser.s_end_date:
            plt.plot(
                df_future.index,
                df_future["5. adjusted close"],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.plot(
                [df_stock.index[-1], df_future.index[0]],
                [
                    df_stock["5. adjusted close"].values[-1],
                    df_future["5. adjusted close"].values[0],
                ],
                lw=1,
                c="tab:blue",
                linestyle="--",
            )

        if gtff.USE_ION:
            plt.ion()

        plt.show()

        # BACKTESTING
        if ns_parser.s_end_date:
            plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI)
            plt.subplot(211)
            plt.plot(
                df_future.index,
                df_future["5. adjusted close"],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.plot(df_pred.index, df_pred, lw=2, c="green")
            plt.scatter(df_future.index,
                        df_future["5. adjusted close"],
                        c="tab:blue",
                        lw=3)
            plt.plot(
                [df_stock.index[-1], df_future.index[0]],
                [
                    df_stock["5. adjusted close"].values[-1],
                    df_future["5. adjusted close"].values[0],
                ],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.scatter(df_pred.index, df_pred, c="green", lw=3)
            plt.plot(
                [df_stock.index[-1], df_pred.index[0]],
                [df_stock["5. adjusted close"].values[-1], df_pred.values[0]],
                lw=2,
                c="green",
                ls="--",
            )
            plt.title("BACKTESTING: Real data price versus Prediction")
            plt.xlim(df_stock.index[-1],
                     df_pred.index[-1] + datetime.timedelta(days=1))
            plt.xticks(
                [
                    df_stock.index[-1],
                    df_pred.index[-1] + datetime.timedelta(days=1)
                ],
                visible=True,
            )
            plt.ylabel("Share Price ($)")
            plt.grid(b=True, which="major", color="#666666", linestyle="-")
            plt.minorticks_on()
            plt.grid(b=True,
                     which="minor",
                     color="#999999",
                     linestyle="-",
                     alpha=0.2)
            plt.legend(["Real data", "Prediction data"])
            plt.xticks([])

            plt.subplot(212)
            plt.axhline(y=0, color="k", linestyle="--", linewidth=2)
            plt.plot(
                df_future.index,
                100 *
                (df_pred.values - df_future["5. adjusted close"].values) /
                df_future["5. adjusted close"].values,
                lw=2,
                c="red",
            )
            plt.scatter(
                df_future.index,
                100 *
                (df_pred.values - df_future["5. adjusted close"].values) /
                df_future["5. adjusted close"].values,
                c="red",
                lw=5,
            )
            plt.title(
                "BACKTESTING: Error between Real data and Prediction [%]")
            plt.plot(
                [df_stock.index[-1], df_future.index[0]],
                [
                    0,
                    100 * (df_pred.values[0] -
                           df_future["5. adjusted close"].values[0]) /
                    df_future["5. adjusted close"].values[0],
                ],
                lw=2,
                ls="--",
                c="red",
            )
            plt.xlim(df_stock.index[-1],
                     df_pred.index[-1] + datetime.timedelta(days=1))
            plt.xticks(
                [
                    df_stock.index[-1],
                    df_pred.index[-1] + datetime.timedelta(days=1)
                ],
                visible=True,
            )
            plt.xlabel("Time")
            plt.ylabel("Prediction Error (%)")
            plt.grid(b=True, which="major", color="#666666", linestyle="-")
            plt.minorticks_on()
            plt.grid(b=True,
                     which="minor",
                     color="#999999",
                     linestyle="-",
                     alpha=0.2)
            plt.legend(["Real data", "Prediction data"])

            if gtff.USE_ION:
                plt.ion()

            plt.show()

            # Refactor prediction dataframe for backtesting print
            df_pred.name = "Prediction"
            df_pred = df_pred.to_frame()
            df_pred["Real"] = df_future["5. adjusted close"]

            if gtff.USE_COLOR:

                patch_pandas_text_adjustment()

                print("Time         Real [$]  x  Prediction [$]")
                print(
                    df_pred.apply(price_prediction_backtesting_color,
                                  axis=1).to_string())
            else:
                print(df_pred[["Real", "Prediction"]].round(2).to_string())

            print("")
            print_prediction_kpis(df_pred["Real"].values,
                                  df_pred["Prediction"].values)

        else:
            # Print prediction data
            print_pretty_prediction(df_pred,
                                    df_stock["5. adjusted close"].values[-1])
        print("")

    except Exception as e:
        print(e, "\n")
Пример #8
0
fig = sm.graphics.tsa.plot_pacf(df["Close First Difference"].iloc[2:],
                                lags=40,
                                ax=ax2)
plt.title("Close First Difference")
plt.show()

# splitting the data into training and testing (last 30 for testing, everything prior is training)
print(df.shape)
train = df.iloc[:-30]
test = df.iloc[-30:]
print("train and test shape:", train.shape, test.shape)

# fitting the train model
model = ARIMA(train["close"], order=(2, 1, 2))
model = model.fit()
print("Model summary for training set:", model.summary())

# now we predict via specifying the start and end range
# in this case, we want to compare prediction with the testing dataset
start = len(train)
end = len(train) + len(test) - 1
# if the predicted values don't have date values as index, uncomment specified line below*
prediction = model.predict(start=start, end=end,
                           typ="levels").rename("ARIMA Predictions")
prediction.index = df.index[start:end + 1]  # uncomment if needed*
# plotting comparison of predicted vs test
plt.title("Prediction vs Testing Set")
test["close"].plot(legend=True)
prediction.plot(legend=True)
plt.show()
Пример #9
0
data = np.log(data_price) - np.log(data_price.shift(1))

scaled_data = pd.DataFrame()
for i in data.columns:
    scaled_data[i] = scaler_timeseries(data[i])

scaled_index = scaled_data["DJI"]
scaled_crypto = scaled_data["BTC"]
order_index = (1, 0, 0)
order_crypto = (0, 0, 0)
lag = 6

#def models_charts(scaled_index, scaled_crypto, order_index, order_crypto, lag):

modelo_index = ARIMA(scaled_index, order=order_index).fit()
print(modelo_index.summary())
modelo_index_garch = arch.arch_model(modelo_index.resid.dropna(),
                                     vol="GARCH").fit()
print(modelo_index_garch.summary())
cond_var_index = modelo_index_garch.conditional_volatility**2

exog_crypto = pd.concat([scaled_index, modelo_index.resid],
                        axis=1).shift(-1 * lag)  # lag 2 debvido a causalidade
exog_crypto.columns = ["Index", "Index resid"]
dados_crypto = pd.concat([exog_crypto, scaled_crypto],
                         axis=1).dropna(how='any')
modelo_crypto = ARIMA(endog=dados_crypto[scaled_crypto.name],
                      exog=dados_crypto[["Index", "Index resid"]],
                      order=order_crypto).fit()
print(modelo_crypto.summary())
modelo_crypto_garch = arch.arch_model(modelo_crypto.resid.dropna(),
Пример #10
0
def arima(l_args, s_ticker, df_stock):
    parser = argparse.ArgumentParser(
        prog="arima",
        description="""
            In statistics and econometrics, and in particular in time series analysis, an
            autoregressive integrated moving average (ARIMA) model is a generalization of an
            autoregressive moving average (ARMA) model. Both of these models are fitted to time
            series data either to better understand the data or to predict future points in the
            series (forecasting). ARIMA(p,d,q) where parameters p, d, and q are non-negative
            integers, p is the order (number of time lags) of the autoregressive model, d is the
            degree of differencing (the number of times the data have had past values subtracted),
            and q is the order of the moving-average model.
        """,
    )

    parser.add_argument(
        "-d",
        "--days",
        action="store",
        dest="n_days",
        type=check_positive,
        default=5,
        help="prediction days.",
    )
    parser.add_argument(
        "-i",
        "--ic",
        action="store",
        dest="s_ic",
        type=str,
        default="aic",
        choices=["aic", "aicc", "bic", "hqic", "oob"],
        help="information criteria.",
    )
    parser.add_argument(
        "-s",
        "--seasonal",
        action="store_true",
        default=False,
        dest="b_seasonal",
        help="Use weekly seasonal data.",
    )
    parser.add_argument(
        "-o",
        "--order",
        action="store",
        dest="s_order",
        type=str,
        help="arima model order (p,d,q) in format: pdq.",
    )
    parser.add_argument(
        "-r",
        "--results",
        action="store_true",
        dest="b_results",
        default=False,
        help="results about ARIMA summary flag.",
    )

    try:
        ns_parser = parse_known_args_and_warn(parser, l_args)

        # Machine Learning model
        if ns_parser.s_order:
            t_order = tuple([int(ord) for ord in list(ns_parser.s_order)])
            model = ARIMA(df_stock["5. adjusted close"].values, order=t_order).fit()
            l_predictions = model.predict(
                start=len(df_stock["5. adjusted close"]) + 1,
                end=len(df_stock["5. adjusted close"]) + ns_parser.n_days,
            )
        else:
            if ns_parser.b_seasonal:
                model = pmdarima.auto_arima(
                    df_stock["5. adjusted close"].values,
                    error_action="ignore",
                    seasonal=True,
                    m=5,
                    information_criteria=ns_parser.s_ic,
                )
            else:
                model = pmdarima.auto_arima(
                    df_stock["5. adjusted close"].values,
                    error_action="ignore",
                    seasonal=False,
                    information_criteria=ns_parser.s_ic,
                )
            l_predictions = model.predict(n_periods=ns_parser.n_days)

        # Prediction data
        l_pred_days = get_next_stock_market_days(
            last_stock_day=df_stock["5. adjusted close"].index[-1],
            n_next_days=ns_parser.n_days,
        )
        df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price")

        if ns_parser.b_results:
            print(model.summary())
            print("")

        # Plotting
        plt.figure()
        plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2)
        if ns_parser.s_order:
            plt.title(
                f"ARIMA {str(t_order)} on {s_ticker} - {ns_parser.n_days} days prediction"
            )
        else:
            plt.title(
                f"ARIMA {model.order} on {s_ticker} - {ns_parser.n_days} days prediction"
            )
        plt.xlim(
            df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1]
        )
        plt.xlabel("Time")
        plt.ylabel("Share Price ($)")
        plt.grid(b=True, which="major", color="#666666", linestyle="-")
        plt.minorticks_on()
        plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2)
        plt.plot(
            [df_stock.index[-1], df_pred.index[0]],
            [df_stock["5. adjusted close"].values[-1], df_pred.values[0]],
            lw=1,
            c="tab:green",
            linestyle="--",
        )
        plt.plot(df_pred.index, df_pred, lw=2, c="tab:green")
        plt.axvspan(
            df_stock.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2
        )
        _, _, ymin, ymax = plt.axis()
        plt.vlines(
            df_stock.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k"
        )
        plt.ion()
        plt.show()

        # Print prediction data
        print_pretty_prediction(df_pred, df_stock["5. adjusted close"].values[-1])
        print("")

    except Exception as e:
        print(e)
        print("")
Пример #11
0
)
plt.xlabel('Date')
plt.ylabel('Nombre de vélos')
plt.title(
    'Nombre de vélos par jour entre 00h00 et 09h00 au totem d\'Albert 1er')
plt.legend()
plt.show()

#Using auto_arima algorithm to find the best suitable orders for ARIMA model
stepwise_fit = auto_arima(totem['Count'], trace=True, seasonal=True)
stepwise_fit.summary

#Building the ARIMA model
model = ARIMA(totem['Count'], order=(2, 1, 1))
model = model.fit()
model.summary()

pred = model.predict(end=len(totem) + 1, type="levels").rename(
    'Prediction ARIMA'
)  #The last index printed corresponds to the day we want the prediction for

#Plotting the prediction curve
pred.plot(legend=True)
plt.xlabel('Date')
plt.ylabel('Nombre de vélos')
plt.title(
    'Nombre de vélos par jour entre 00h00 et 09h00 au totem d\'Albert 1er')
plt.legend()
plt.show()
print(pred.tail)
Пример #12
0
resid_frame = pd.DataFrame(columns = data.columns)
conditional_volatilities_stocks_frame = pd.DataFrame(columns=data.columns)
for i in data.columns:
    resid_frame[i] = pd.Series(modelos[i].arima_res_.resid, index= data[i].dropna().index)
    conditional_volatilities_stocks_frame[i] = pd.Series(modelos_garch[i].conditional_volatility, index= data[i].dropna().index)



#DADOS DO INDICE
indice = yf.download("^DJI", start="2002-01-01",interval="1d", group_by='ticker', auto_adjust=True)
indice = indice.fillna(method="ffill").fillna(method="bfill")
indice_ret = (np.log(indice["Close"]) - np.log(indice["Close"].shift(1))).dropna()


indice_modelo = ARIMA(endog=indice_ret, exog=resid_frame, order=(1,0,0)).fit()
print(indice_modelo.summary())
indice_garch = arch.arch_model(indice_modelo.resid, vol = "GARCH", rescale=True).fit()
print(indice_garch.summary())
cond_var_index = indice_garch.conditional_volatility

h_stocks = pd.Series(0,name="total_vol", index=resid_frame.index)
for i in resid_frame.columns:
    h_stocks = h_stocks + indice_modelo.params[i]**2*conditional_volatilities_stocks_frame[i]**2
    print(f"{i} h_stocks {h_stocks}")
h = h_stocks + cond_var_index**2

spillovers = pd.DataFrame(columns=resid_frame.columns)
for i in resid_frame.columns:
    spillovers[i] = (indice_modelo.params[i]*conditional_volatilities_stocks_frame[i])/(h**0.5)

for i in spillovers.columns:
Пример #13
0
class ARIMAModel(ModelStrategy):
    '''
    A class for an Autoregressive Integrated Moving Average Model and the standard operations on it
    '''
    def __init__(self, hparams, log_dir=None):
        univariate = True
        model = None
        name = 'ARIMA'
        self.auto_params = hparams['AUTO_PARAMS']
        self.p = int(hparams.get('P', 30))
        self.d = int(hparams.get('D', 0))
        self.q = int(hparams.get('Q', 0))
        super(ARIMAModel, self).__init__(model,
                                         univariate,
                                         name,
                                         log_dir=log_dir)

    def fit(self, dataset):
        '''
        Fits an ARIMA forecasting model
        :param dataset: A Pandas DataFrame with 2 columns: Date and Consumption
        '''
        if dataset.shape[1] != 2:
            raise Exception(
                'Univariate models cannot fit with datasets with more than 1 feature.'
            )
        dataset.rename(columns={
            'Date': 'ds',
            'Consumption': 'y'
        },
                       inplace=True)
        series = dataset.set_index('ds')
        if self.auto_params:
            best_model = pmdarima.auto_arima(series,
                                             seasonal=False,
                                             stationary=False,
                                             information_criterion='aic',
                                             max_order=2 * (self.p + self.q),
                                             max_p=2 * self.p,
                                             max_d=2 * self.d,
                                             max_q=2 * self.q,
                                             error_action='ignore')
            order = best_model.order
            print("Best ARIMA params: (p, d, q):", best_model.order)
        else:
            order = (self.p, self.d, self.q)
        self.model = ARIMA(series, order=order).fit()
        print(self.model.summary())
        return

    def evaluate(self, train_set, test_set, save_dir=None, plot=False):
        '''
        Evaluates performance of ARIMA model on test set
        :param train_set: A Pandas DataFrame with 2 columns: Date and Consumption
        :param test_set: A Pandas DataFrame with 2 columns: Date and Consumption
        :param save_dir: Directory in which to save forecast metrics
        :param plot: Flag indicating whether to plot the forecast evaluation
        '''
        train_set.rename(columns={
            'Date': 'ds',
            'Consumption': 'y'
        },
                         inplace=True)
        test_set.rename(columns={
            'Date': 'ds',
            'Consumption': 'y'
        },
                        inplace=True)
        train_set = train_set.set_index('ds')
        test_set = test_set.set_index('ds')
        train_set["model"] = self.model.fittedvalues
        test_set["forecast"] = self.forecast(
            test_set.shape[0])['Consumption'].tolist()

        df_forecast = train_set.append(test_set).rename(columns={'y': 'gt'})
        test_metrics = self.evaluate_forecast(df_forecast,
                                              save_dir=save_dir,
                                              plot=plot)
        return test_metrics

    def forecast(self, days, recent_data=None):
        '''
        Create a forecast for the test set. Note that this is different than obtaining predictions for the test set.
        The model makes a prediction for the provided example, then uses the result for the next prediction.
        Repeat this process for a specified number of days.
        :param days: Number of days into the future to produce a forecast for
        :param recent_data: A factual example for the first prediction
        :return: An array of predictions
        '''
        forecast_df = self.model.forecast(steps=days).reset_index(level=0)
        forecast_df.columns = ['Date', 'Consumption']
        return forecast_df

    def save(self, save_dir, scaler_dir=None):
        '''
        Saves the model to disk
        :param save_dir: Directory in which to save the model
        '''
        if self.model:
            model_path = os.path.join(save_dir,
                                      self.name + self.train_date + '.pkl')
            self.model.save(model_path)  # Serialize and save the model object

    def load(self, model_path, scaler_path=None):
        '''
        Loads the model from disk
        :param model_path: Path to saved model
        '''
        if os.path.splitext(model_path)[1] != '.pkl':
            raise Exception('Model file path for ' + self.name +
                            ' must have ".pkl" extension.')
        self.model = ARIMAResults.load(model_path)
        return
            plt.savefig(f"images/{company}/{event}/{company} Autocorrelation, {d} Diffs Applied, before {event}", bbox_inches='tight')
            plt.clf()
        else:
            plt.show()

        #########################################

        # forecast with ARIMA model and perform statistical analysis
        #########################################

        if not run_forecast:
            continue

        model = ARIMA(y1, order=ARIMA_orders[j][i])
        model = model.fit()
        print(model.summary())

        # plot residuals
        # normal residual plot
        residuals = model.resid[1:] # remove first residual because it is huge and messes with scale of plot
        fig, ax = plt.subplots(1,2)
        ax[0].plot(x1[1:], residuals)
        ax[0].set_ylabel('Stock Price ($)', color='purple')
        ax[0].set_xlabel('Date', color='purple')
        plt.sca(ax[0])
        plt.xticks(x1[1::len(x1)//5], rotation=30)
        plt.title('Residuals', color='purple')
        # density plot
        plt.sca(ax[1])
        residuals.plot(kind='density', ax=ax[1]) # pandas series/dataframe function that plots each column separately
        plt.ylabel('Density', color='purple')
# estimators.

# #### `SARIMAX`


def print_params(s):
    from io import StringIO

    return pd.read_csv(StringIO(s.tables[1].as_csv()), index_col=0)


print_params(sarimax_exog_res.summary())

# #### `ARIMA`

print_params(arima_exog_res.summary())

# ### `exog` in `AutoReg`
#
# When using `AutoReg` to estimate a model using OLS, the model differs
# from both `SARIMAX` and `ARIMA`. The `AutoReg` specification with
# exogenous variables is
#
# $$
# \begin{align}
# Y_t & = \phi + \rho Y_{t-1} + X_{t}\beta + \eta_t \\
# \eta_t & \sim WN(0,\sigma^2) \\
# \end{align}
# $$
#
# This specification is not equivalent to the specification estimated in
Пример #16
0
 def fit_model(self, train_data, namefile):
     model = ARIMA(train_data, order=(2, 1, 3), enforce_stationarity=True)
     model = model.fit()
     model.summary()
     model.save(namefile + '.pickle')
     return model
Пример #17
0
def arima(l_args, s_ticker, s_interval, df_stock):
    parser = argparse.ArgumentParser(
        prog='arima',
        description="""In statistics and econometrics, and in particular in time
                                     series analysis, an autoregressive integrated moving average (ARIMA) model
                                     is a generalization of an autoregressive moving average (ARMA) model. Both
                                     of these models are fitted to time series data either to better understand
                                     the data or to predict future points in the series (forecasting).
                                     ARIMA(p,d,q) where parameters p, d, and q are non-negative integers, p is
                                     the order (number of time lags) of the autoregressive model, d is the degree
                                     of differencing (the number of times the data have had past values subtracted),
                                     and q is the order of the moving-average model."""
    )

    parser.add_argument('-d',
                        "--days",
                        action="store",
                        dest="n_days",
                        type=check_positive,
                        default=5,
                        help='prediction days.')
    parser.add_argument('-i',
                        "--ic",
                        action="store",
                        dest="s_ic",
                        type=str,
                        default='aic',
                        choices=['aic', 'aicc', 'bic', 'hqic', 'oob'],
                        help='information criteria.')
    parser.add_argument('-s',
                        "--seasonal",
                        action="store_true",
                        default=False,
                        dest="b_seasonal",
                        help='Use weekly seasonal data.')
    parser.add_argument('-o',
                        "--order",
                        action="store",
                        dest="s_order",
                        type=str,
                        help='arima model order (p,d,q) in format: pdq.')
    parser.add_argument('-r',
                        "--results",
                        action="store_true",
                        dest="b_results",
                        default=False,
                        help='results about ARIMA summary flag.')

    try:
        (ns_parser, l_unknown_args) = parser.parse_known_args(l_args)

        if l_unknown_args:
            print(
                f"The following args couldn't be interpreted: {l_unknown_args}\n"
            )
            return

        # Machine Learning model
        if ns_parser.s_order:
            t_order = tuple([int(ord) for ord in list(ns_parser.s_order)])
            model = ARIMA(df_stock['5. adjusted close'].values,
                          order=t_order).fit()
            l_predictions = model.predict(
                start=len(df_stock['5. adjusted close']) + 1,
                end=len(df_stock['5. adjusted close']) + ns_parser.n_days)
        else:
            if ns_parser.b_seasonal:
                model = pmdarima.auto_arima(
                    df_stock['5. adjusted close'].values,
                    error_action='ignore',
                    seasonal=True,
                    m=5,
                    information_criteria=ns_parser.s_ic)
            else:
                model = pmdarima.auto_arima(
                    df_stock['5. adjusted close'].values,
                    error_action='ignore',
                    seasonal=False,
                    information_criteria=ns_parser.s_ic)
            l_predictions = model.predict(n_periods=ns_parser.n_days)

        # Prediction data
        l_pred_days = get_next_stock_market_days(
            last_stock_day=df_stock['5. adjusted close'].index[-1],
            n_next_days=ns_parser.n_days)
        df_pred = pd.Series(l_predictions, index=l_pred_days, name='Price')

        if ns_parser.b_results:
            print(model.summary())
            print("")

        # Plotting
        plt.plot(df_stock.index, df_stock['5. adjusted close'], lw=2)
        if ns_parser.s_order:
            plt.title(
                f"ARIMA {str(t_order)} on {s_ticker} - {ns_parser.n_days} days prediction"
            )
        else:
            plt.title(
                f"ARIMA {model.order} on {s_ticker} - {ns_parser.n_days} days prediction"
            )
        plt.xlim(df_stock.index[0],
                 get_next_stock_market_days(df_pred.index[-1], 1)[-1])
        plt.xlabel('Time')
        plt.ylabel('Share Price ($)')
        plt.grid(b=True, which='major', color='#666666', linestyle='-')
        plt.minorticks_on()
        plt.grid(b=True,
                 which='minor',
                 color='#999999',
                 linestyle='-',
                 alpha=0.2)
        plt.plot([df_stock.index[-1], df_pred.index[0]],
                 [df_stock['5. adjusted close'].values[-1], df_pred.values[0]],
                 lw=1,
                 c='tab:green',
                 linestyle='--')
        plt.plot(df_pred.index, df_pred, lw=2, c='tab:green')
        plt.axvspan(df_stock.index[-1],
                    df_pred.index[-1],
                    facecolor='tab:orange',
                    alpha=0.2)
        xmin, xmax, ymin, ymax = plt.axis()
        plt.vlines(df_stock.index[-1],
                   ymin,
                   ymax,
                   linewidth=1,
                   linestyle='--',
                   color='k')
        plt.show()

        # Print prediction data
        print("Predicted share price:")
        df_pred = df_pred.apply(lambda x: f"{x:.2f} $")
        print(df_pred.to_string())
        print("")

    except:
        print("")
Пример #18
0
import pandas as pd
from matplotlib import pyplot
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
import warnings

warnings.simplefilter('ignore', category=UserWarning)

dataset = "Paleo"

series = pd.read_csv(f'{dataset}.csv',
                     header=0,
                     index_col=0,
                     parse_dates=True,
                     squeeze=True)

for header in series.iloc[:, 11:17]:
    index_no = series.columns.get_loc(f'{header}')
    arima_model = ARIMA(series[f'{header}'], order=(1, 0, 1)).fit()
    pred = arima_model.predict(dynamic=False)
    rmse = mean_squared_error(series[f'{header}'], pred, squared=False)

    print(arima_model.summary())
    print(f"RMSE = {rmse}")

    pyplot.plot(series[f'{header}'])
    pyplot.plot(pred, color='red')
    pyplot.title(f'{series.columns[index_no]} intake over time ({dataset})')
    pyplot.xlabel('Date')
    pyplot.ylabel(f'{series.columns[index_no]}')
    pyplot.show()