def arima(other_args: List[str], s_ticker: str, df_stock: pd.DataFrame): """ ARIMA prediction Parameters ---------- other_args: List[str] Argparse arguments s_ticker: str ticker df_stock: pd.DataFrame Dataframe of prices """ parser = argparse.ArgumentParser( add_help=False, prog="arima", description=""" In statistics and econometrics, and in particular in time series analysis, an autoregressive integrated moving average (ARIMA) model is a generalization of an autoregressive moving average (ARMA) model. Both of these models are fitted to time series data either to better understand the data or to predict future points in the series (forecasting). ARIMA(p,d,q) where parameters p, d, and q are non-negative integers, p is the order (number of time lags) of the autoregressive model, d is the degree of differencing (the number of times the data have had past values subtracted), and q is the order of the moving-average model. """, ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days.", ) parser.add_argument( "-i", "--ic", action="store", dest="s_ic", type=str, default="aic", choices=["aic", "aicc", "bic", "hqic", "oob"], help="information criteria.", ) parser.add_argument( "-s", "--seasonal", action="store_true", default=False, dest="b_seasonal", help="Use weekly seasonal data.", ) parser.add_argument( "-o", "--order", action="store", dest="s_order", type=str, help="arima model order (p,d,q) in format: p,d,q.", ) parser.add_argument( "-r", "--results", action="store_true", dest="b_results", default=False, help="results about ARIMA summary flag.", ) parser.add_argument( "-e", "--end", action="store", type=valid_date, dest="s_end_date", default=None, help="The end date (format YYYY-MM-DD) to select - Backtesting", ) try: ns_parser = parse_known_args_and_warn(parser, other_args) if not ns_parser: return # BACKTESTING if ns_parser.s_end_date: if ns_parser.s_end_date < df_stock.index[0]: print( "Backtesting not allowed, since End Date is older than Start Date of historical data\n" ) return if ( ns_parser.s_end_date < get_next_stock_market_days( last_stock_day=df_stock.index[0], n_next_days=5 + ns_parser.n_days )[-1] ): print( "Backtesting not allowed, since End Date is too close to Start Date to train model\n" ) return future_index = get_next_stock_market_days( last_stock_day=ns_parser.s_end_date, n_next_days=ns_parser.n_days ) if future_index[-1] > datetime.datetime.now(): print( "Backtesting not allowed, since End Date + Prediction days is in the future\n" ) return df_future = df_stock[future_index[0] : future_index[-1]] df_stock = df_stock[: ns_parser.s_end_date] # Machine Learning model if ns_parser.s_order: t_order = tuple(int(ord) for ord in ns_parser.s_order.split(",")) model = ARIMA(df_stock["Adj Close"].values, order=t_order).fit() l_predictions = model.predict( start=len(df_stock["Adj Close"]) + 1, end=len(df_stock["Adj Close"]) + ns_parser.n_days, ) else: if ns_parser.b_seasonal: model = pmdarima.auto_arima( df_stock["Adj Close"].values, error_action="ignore", seasonal=True, m=5, information_criteria=ns_parser.s_ic, ) else: model = pmdarima.auto_arima( df_stock["Adj Close"].values, error_action="ignore", seasonal=False, information_criteria=ns_parser.s_ic, ) l_predictions = [ i if i > 0 else 0 for i in model.predict(n_periods=ns_parser.n_days) ] # Prediction data l_pred_days = get_next_stock_market_days( last_stock_day=df_stock["Adj Close"].index[-1], n_next_days=ns_parser.n_days, ) df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price") if ns_parser.b_results: print(model.summary()) print("") # Plotting plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.plot(df_stock.index, df_stock["Adj Close"], lw=2) if ns_parser.s_order: # BACKTESTING if ns_parser.s_end_date: plt.title( f"BACKTESTING: ARIMA {str(t_order)} on {s_ticker} - {ns_parser.n_days} days prediction" ) else: plt.title( f"ARIMA {str(t_order)} on {s_ticker} - {ns_parser.n_days} days prediction" ) else: # BACKTESTING if ns_parser.s_end_date: plt.title( f"BACKTESTING: ARIMA {model.order} on {s_ticker} - {ns_parser.n_days} days prediction" ) else: plt.title( f"ARIMA {model.order} on {s_ticker} - {ns_parser.n_days} days prediction" ) plt.xlim( df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1] ) plt.xlabel("Time") plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["Adj Close"].values[-1], df_pred.values[0]], lw=1, c="tab:green", linestyle="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="tab:green") plt.axvspan( df_stock.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2 ) _, _, ymin, ymax = plt.axis() plt.vlines( df_stock.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k" ) # BACKTESTING if ns_parser.s_end_date: plt.plot( df_future.index, df_future["Adj Close"], lw=2, c="tab:blue", ls="--", ) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["Adj Close"].values[-1], df_future["Adj Close"].values[0], ], lw=1, c="tab:blue", linestyle="--", ) if gtff.USE_ION: plt.ion() plt.show() # BACKTESTING if ns_parser.s_end_date: plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.subplot(211) plt.plot( df_future.index, df_future["Adj Close"], lw=2, c="tab:blue", ls="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="green") plt.scatter(df_future.index, df_future["Adj Close"], c="tab:blue", lw=3) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["Adj Close"].values[-1], df_future["Adj Close"].values[0], ], lw=2, c="tab:blue", ls="--", ) plt.scatter(df_pred.index, df_pred, c="green", lw=3) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["Adj Close"].values[-1], df_pred.values[0]], lw=2, c="green", ls="--", ) plt.title("BACKTESTING: Real data price versus Prediction") plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)) plt.xticks( [df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)], visible=True, ) plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) plt.xticks([]) plt.subplot(212) plt.axhline(y=0, color="k", linestyle="--", linewidth=2) plt.plot( df_future.index, 100 * (df_pred.values - df_future["Adj Close"].values) / df_future["Adj Close"].values, lw=2, c="red", ) plt.scatter( df_future.index, 100 * (df_pred.values - df_future["Adj Close"].values) / df_future["Adj Close"].values, c="red", lw=5, ) plt.title("BACKTESTING: Error between Real data and Prediction [%]") plt.plot( [df_stock.index[-1], df_future.index[0]], [ 0, 100 * (df_pred.values[0] - df_future["Adj Close"].values[0]) / df_future["Adj Close"].values[0], ], lw=2, ls="--", c="red", ) plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)) plt.xticks( [df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)], visible=True, ) plt.xlabel("Time") plt.ylabel("Prediction Error (%)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) if gtff.USE_ION: plt.ion() plt.show() # Refactor prediction dataframe for backtesting print df_pred.name = "Prediction" df_pred = df_pred.to_frame() df_pred["Real"] = df_future["Adj Close"] if gtff.USE_COLOR: patch_pandas_text_adjustment() print("Time Real [$] x Prediction [$]") print( df_pred.apply( price_prediction_backtesting_color, axis=1 ).to_string() ) else: print(df_pred[["Real", "Prediction"]].round(2).to_string()) print("") print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values) else: # Print prediction data print_pretty_prediction(df_pred, df_stock["Adj Close"].values[-1]) print("") except Exception as e: print(e, "\n")
def display_exponential_smoothing( ticker: str, values: Union[pd.DataFrame, pd.Series], n_predict: int, trend: str = "N", seasonal: str = "N", seasonal_periods: int = 5, s_end_date: str = "", export: str = "", time_res: str = "", ): """Perform exponential smoothing Parameters ---------- ticker : str Dataset being smoothed values : Union[pd.DataFrame, pd.Series] Raw data n_predict : int Days to predict trend : str, optional Trend variable, by default "N" seasonal : str, optional Seasonal variable, by default "N" seasonal_periods : int, optional Number of seasonal periods, by default 5 s_end_date : str, optional End date for backtesting, by default "" export : str, optional Format to export data, by default "" time_res : str Resolution for data, allowing for predicting outside of standard market days """ if s_end_date: if not time_res: future_index = get_next_stock_market_days( last_stock_day=s_end_date, n_next_days=n_predict) else: future_index = pd.date_range(s_end_date, periods=n_predict + 1, freq=time_res)[1:] if future_index[-1] > datetime.datetime.now(): console.print( "Backtesting not allowed, since End Date + Prediction days is in the future\n" ) return df_future = values[future_index[0]:future_index[-1]] values = values[:s_end_date] # type: ignore # Get ETS model model, title, forecast = ets_model.get_exponential_smoothing_model( values, trend, seasonal, seasonal_periods, n_predict) if not forecast: console.print("No forecast made. Model did not converge.\n") return if np.isnan(forecast).any(): console.print("Model predicted NaN values. Runtime Error.\n") return if not time_res: l_pred_days = get_next_stock_market_days( last_stock_day=values.index[-1], n_next_days=n_predict, ) else: l_pred_days = pd.date_range(values.index[-1], periods=n_predict + 1, freq=time_res)[1:] df_pred = pd.Series(forecast, index=l_pred_days, name="Price") console.print(f"\n{title}") console.print("\nFit model parameters:") for key, value in model.params.items(): console.print(f"{key} {' '*(18-len(key))}: {value}") console.print("\nAssess fit model:") console.print(f"AIC: {round(model.aic, 2)}") console.print(f"BIC: {round(model.bic, 2)}") console.print(f"SSE: {round(model.sse, 2)}\n") # Plotting fig, ax = plt.subplots(figsize=plot_autoscale(), dpi=PLOT_DPI) ax.plot(values.index, values.values, lw=2) # BACKTESTING if s_end_date: ax.set_title(f"BACKTESTING: {title} on {ticker}") else: ax.set_title(f"{title} on {ticker}") ax.set_xlim( values.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1], ) ax.set_xlabel("Time") ax.set_ylabel("Share Price ($)") ax.grid(b=True, which="major", color="#666666", linestyle="-") ax.minorticks_on() ax.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) ax.plot( [values.index[-1], df_pred.index[0]], [values.values[-1], df_pred.values[0]], lw=1, c="tab:green", linestyle="--", ) ax.plot(df_pred.index, df_pred, lw=2, c="tab:green") ax.axvspan( values.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2, ) _, _, ymin, ymax = plt.axis() ax.vlines( values.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k", ) dateFmt = mdates.DateFormatter("%m/%d/%Y") ax.xaxis.set_major_formatter(dateFmt) ax.tick_params(axis="x", labelrotation=45) # BACKTESTING if s_end_date: ax.plot( df_future.index, df_future, lw=2, c="tab:blue", ls="--", ) ax.plot( [values.index[-1], df_future.index[0]], [ values.values[-1], df_future.values[0], ], lw=1, c="tab:blue", linestyle="--", ) if gtff.USE_ION: plt.ion() fig.tight_layout() plt.show() # BACKTESTING if s_end_date: dateFmt = mdates.DateFormatter("%m-%d") fig, ax = plt.subplots(1, 2, figsize=plot_autoscale(), dpi=PLOT_DPI) ax0 = ax[0] ax0.plot( df_future.index, df_future, lw=2, c="tab:blue", ls="--", ) ax0.plot(df_pred.index, df_pred, lw=2, c="green") ax0.scatter( df_future.index, df_future, c="tab:blue", lw=3, ) ax0.plot( [values.index[-1], df_future.index[0]], [ values.values[-1], df_future.values[0], ], lw=2, c="tab:blue", ls="--", ) ax0.scatter(df_pred.index, df_pred, c="green", lw=3) ax0.plot( [values.index[-1], df_pred.index[0]], [values.values[-1], df_pred.values[0]], lw=2, c="green", ls="--", ) ax0.set_title("BACKTESTING: Prices") ax0.set_xlim( values.index[-1], df_pred.index[-1] + datetime.timedelta(days=1), ) ax0.set_ylabel("Share Price ($)") ax0.grid(b=True, which="major", color="#666666", linestyle="-") ax0.legend(["Real data", "Prediction data"]) ax1 = ax[1] ax1.axhline(y=0, color="k", linestyle="--", linewidth=2) ax1.plot( df_future.index, 100 * (df_pred.values - df_future.values) / df_future.values, lw=2, c="red", ) ax1.scatter( df_future.index, 100 * (df_pred.values - df_future.values) / df_future.values, c="red", lw=5, ) ax1.set_title("BACKTESTING: % Error") ax1.plot( [values.index[-1], df_future.index[0]], [ 0, 100 * (df_pred.values[0] - df_future.values[0]) / df_future.values[0], ], lw=2, ls="--", c="red", ) ax1.set_xlim( values.index[-1], df_pred.index[-1] + datetime.timedelta(days=1), ) ax1.set_xlabel("Time") ax1.set_ylabel("Prediction Error (%)") ax1.grid(b=True, which="major", color="#666666", linestyle="-") ax1.legend(["Real data", "Prediction data"]) ax0.xaxis.set_major_formatter(dateFmt) ax0.tick_params(axis="x", labelrotation=45) ax1.xaxis.set_major_formatter(dateFmt) ax1.tick_params(axis="x", labelrotation=45) if gtff.USE_ION: plt.ion() fig.tight_layout() plt.show() # Refactor prediction dataframe for backtesting print df_pred.name = "Prediction" df_pred = df_pred.to_frame() df_pred["Real"] = df_future if gtff.USE_COLOR: patch_pandas_text_adjustment() console.print("Time Real [$] x Prediction [$]") console.print( df_pred.apply(price_prediction_backtesting_color, axis=1).to_string()) else: console.print(df_pred[["Real", "Prediction"]].round(2).to_string()) console.print("") print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values) else: # Print prediction data print_pretty_prediction(df_pred, values.values[-1]) export_data(export, os.path.dirname(os.path.abspath(__file__)), "ets") console.print("")
def display_regression( dataset: str, values: Union[pd.Series, pd.DataFrame], poly_order: int, n_input: int, n_predict: int, n_jumps: int, s_end_date: str = "", export: str = "", time_res: str = "", external_axes: Optional[List[plt.Axes]] = None, ): """Display predications for regression models Parameters ---------- dataset : str Title for data values : Union[pd.Series, pd.DataFrame] Data to fit poly_order : int Order of polynomial to fit n_input : int Length of input sequence n_predict : int Length of prediction sequence n_jumps : int Number of jumps in data s_end_date : str, optional Start date for backtesting export : str, optional Format for exporting figures time_res : str Resolution for data, allowing for predicting outside of standard market days external_axes : Optional[List[plt.Axes]], optional External axes (1 axis is expected in the list), by default None """ # BACKTESTING if s_end_date: if not time_res: future_index = get_next_stock_market_days( last_stock_day=s_end_date, n_next_days=n_predict ) else: future_index = pd.date_range( s_end_date, periods=n_predict + 1, freq=time_res )[1:] df_future = values[future_index[0] : future_index[-1]] # noqa: E203 values = values[:s_end_date] # type: ignore l_predictions, _ = regression_model.get_regression_model( list(values.values), poly_order, n_input, n_predict, n_jumps ) # Prediction data if not time_res: l_pred_days = get_next_stock_market_days( last_stock_day=values.index[-1], n_next_days=n_predict, ) else: l_pred_days = pd.date_range( values.index[-1], periods=n_predict + 1, freq=time_res )[1:] df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price") # Plotting # This plot has 1 axes if external_axes is None: _, ax1 = plt.subplots(figsize=plot_autoscale(), dpi=PLOT_DPI) else: if (not s_end_date and len(external_axes) != 1) or ( s_end_date and len(external_axes) != 3 ): logger.error("Expected list of 1 axis or 3 axes when backtesting.") console.print( "[red]Expected list of 1 axis or 3 axes when backtesting./n[/red]" ) return ax1 = external_axes[0] ax1.plot(values.index, values) # BACKTESTING if s_end_date: ax1.set_title( f"BACKTESTING: Regression (polynomial {poly_order}) on {dataset} - {n_predict} step prediction", fontsize=12, ) else: ax1.set_title( f"Regression (polynomial {poly_order}) on {dataset} - {n_predict} step prediction" ) ax1.set_xlim(values.index[0], l_pred_days[-1]) ax1.set_ylabel("Value") ax1.plot( [values.index[-1], df_pred.index[0]], [values.values[-1], df_pred.values[0]], color=theme.down_color, linestyle="--", ) ax1.plot(df_pred.index, df_pred, color=theme.down_color) ax1.axvspan(values.index[-1], df_pred.index[-1], alpha=0.2) _, _, ymin, ymax = plt.axis() ax1.vlines(values.index[-1], ymin, ymax, linestyle="--") # BACKTESTING if s_end_date: ax1.plot( df_future.index, df_future, color=theme.up_color, linestyle="--", ) ax1.plot( [values.index[-1], df_future.index[0]], [ values.values[-1], df_future.values[0], ], color=theme.up_color, linestyle="--", ) theme.style_primary_axis(ax1) if external_axes is None: theme.visualize_output() export_data(export, os.path.dirname(os.path.abspath(__file__)), "regression") console.print("") # BACKTESTING if s_end_date: # This plot has 1 axes if external_axes is None: _, axes = plt.subplots( 2, 1, sharex=True, figsize=plot_autoscale(), dpi=PLOT_DPI ) (ax2, ax3) = axes else: if len(external_axes) != 3: logger.error("Expected list of three axis items.") console.print("[red]Expected list of 3 axis items./n[/red]") return (_, ax2, ax3) = external_axes ax2.plot( df_future.index, df_future, color=theme.up_color, linestyle="--", ) ax2.plot(df_pred.index, df_pred, color=theme.down_color, marker="o") ax2.plot( [values.index[-1], df_future.index[0]], [ values.values[-1], df_future.values[0], ], color=theme.up_color, linestyle="--", ) ax2.plot( [values.index[-1], df_pred.index[0]], [values.values[-1], df_pred.values[0]], color=theme.down_color, linestyle="--", marker="o", ) ax2.set_title("BACKTESTING: Real data vs Prediction", fontsize=12) ax2.set_xlim(values.index[-1], df_pred.index[-1]) ax2.set_ylabel("Value") ax2.legend(["Real data", "Prediction data"]) ax3.axhline(y=0, linestyle="--", color=theme.up_color) ax3.plot( df_future.index, 100 * (df_pred.values - df_future.values) / df_future.values, color=theme.down_color, marker="o", ) ax3.set_title( "BACKTESTING: Error between Real data and Prediction [%]", fontsize=12 ) ax3.plot( [values.index[-1], df_future.index[0]], [ 0, 100 * (df_pred.values[0] - df_future.values[0]) / df_future.values[0], ], linestyle="--", color=theme.down_color, ) ax3.set_xlim(values.index[-1], df_pred.index[-1]) ax3.set_xlabel("Time") ax3.set_ylabel("Error (%)") ax3.legend(["Real data", "Prediction data"]) theme.style_primary_axis(ax2) theme.style_primary_axis(ax3) if external_axes is None: theme.visualize_output() # Refactor prediction dataframe for backtesting print df_pred.name = "Prediction" df_pred = df_pred.to_frame() df_pred["Real"] = df_future if rich_config.USE_COLOR: patch_pandas_text_adjustment() console.print("Time Real [$] x Prediction [$]") console.print( df_pred.apply( lambda_price_prediction_backtesting_color, axis=1 ).to_string() ) else: console.print(df_pred[["Real", "Prediction"]].round(2).to_string()) console.print("") print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values) else: # Print prediction data print_pretty_prediction(df_pred, values.values[-1]) console.print("")
def display_arima( dataset: str, values: Union[pd.DataFrame, pd.Series], arima_order: str, n_predict: int, seasonal: bool, ic: str, results: bool, s_end_date: str = "", export: str = "", ): """View fit ARIMA model Parameters ---------- dataset : str String indicating dataset (for plot title) values : Union[pd.DataFrame, pd.Series] Data to fit arima_order : str String of ARIMA params in form "p,q,d" n_predict : int Days to predict seasonal : bool Flag to use seasonal model ic : str Information Criteria for model evaluation results : bool Flag to display model summary s_end_date : str, optional Specified end date for backtesting comparisons export : str, optional Format to export image """ if arima_order: t_order = tuple(int(ord) for ord in arima_order.split(",")) if s_end_date: future_index = get_next_stock_market_days(last_stock_day=s_end_date, n_next_days=n_predict) if future_index[-1] > datetime.datetime.now(): print( "Backtesting not allowed, since End Date + Prediction days is in the future\n" ) return df_future = values[future_index[0]:future_index[-1]] values = values[:s_end_date] # type: ignore l_predictions, model = arima_model.get_arima_model(values, arima_order, n_predict, seasonal, ic) # Prediction data l_pred_days = get_next_stock_market_days( last_stock_day=values.index[-1], n_next_days=n_predict, ) df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price") if results: print(model.summary()) print("") # Plotting fig, ax = plt.subplots(figsize=plot_autoscale(), dpi=PLOT_DPI) ax.plot(values.index, values, lw=2) # pylint:disable=no-member if arima_order: # BACKTESTING if s_end_date: ax.set_title( f"BACKTESTING: ARIMA {str(t_order)} on {dataset} - {n_predict} days prediction" ) else: ax.set_title( f"ARIMA {str(t_order)} on {dataset} - {n_predict} days prediction" ) else: # BACKTESTING if s_end_date: ax.set_title( f"BACKTESTING: ARIMA {model.order} on {dataset} - {n_predict} days prediction" ) else: plt.title( f"ARIMA {model.order} on {dataset} - {n_predict} days prediction" ) ax.set_xlim(values.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1]) ax.set_xlabel("Time") ax.set_ylabel("Value") ax.grid(b=True, which="major", color="#666666", linestyle="-") ax.minorticks_on() ax.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) ax.plot( [values.index[-1], df_pred.index[0]], [values.values[-1], df_pred.values[0]], lw=1, c="tab:green", linestyle="--", ) ax.plot(df_pred.index, df_pred, lw=2, c="tab:green") ax.axvspan(values.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2) _, _, ymin, ymax = plt.axis() ax.vlines(values.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k") # BACKTESTING if s_end_date: ax.plot( df_future.index, df_future.values, lw=2, c="tab:blue", ls="--", ) plt.plot( [values.index[-1], df_future.index[0]], [ values.values[-1], df_future.values[0], ], lw=1, c="tab:blue", linestyle="--", ) fig.tight_layout() if gtff.USE_ION: plt.ion() plt.show() # BACKTESTING if s_end_date: fig, ax = plt.subplots(1, 2, figsize=plot_autoscale(), dpi=PLOT_DPI) ax0 = ax[0] ax0.plot( df_future.index, df_future.values, lw=2, c="tab:blue", ls="--", ) ax0.plot(df_pred.index, df_pred, lw=2, c="green") ax0.scatter(df_future.index, df_future, c="tab:blue", lw=3) ax0.plot( [values.index[-1], df_future.index[0]], [ values.values[-1], df_future.values[0], ], lw=2, c="tab:blue", ls="--", ) ax0.scatter(df_pred.index, df_pred, c="green", lw=3) ax0.plot( [values.index[-1], df_pred.index[0]], [values.values[-1], df_pred.values[0]], lw=2, c="green", ls="--", ) ax0.set_title("BACKTESTING: Real data Prediction") ax0.set_xlim(values.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)) ax0.set_xticks( [values.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)]) ax0.set_ylabel("Value") ax0.grid(b=True, which="major", color="#666666", linestyle="-") ax0.minorticks_on() ax0.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) ax0.legend(["Real data", "Prediction data"]) ax0.set_xticks([]) ax1 = ax[1] ax1.axhline(y=0, color="k", linestyle="--", linewidth=2) ax1.plot( df_future.index, 100 * (df_pred.values - df_future.values) / df_future.values, lw=2, c="red", ) ax1.scatter( df_future.index, 100 * (df_pred.values - df_future.values) / df_future.values, c="red", lw=5, ) ax1.set_title( "BACKTESTING: Error between Real data and Prediction [%]") ax1.plot( [values.index[-1], df_future.index[0]], [ 0, 100 * (df_pred.values[0] - df_future.values[0]) / df_future.values[0], ], lw=2, ls="--", c="red", ) ax1.set_xlim(values.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)) ax1.set_xticks( [values.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)]) ax1.set_xlabel("Time") ax1.set_ylabel("Prediction Error (%)") ax1.grid(b=True, which="major", color="#666666", linestyle="-") ax1.minorticks_on() ax1.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) ax1.legend(["Real data", "Prediction data"]) fig.tight_layout() if gtff.USE_ION: plt.ion() plt.show() # Refactor prediction dataframe for backtesting print df_pred.name = "Prediction" df_pred = df_pred.to_frame() df_pred["Real"] = df_future.values if gtff.USE_COLOR: if gtff.USE_TABULATE_DF: df_pred["Real"] = df_pred["Real"].astype(float) df_pred["Prediction"] = df_pred["Prediction"].astype(float) df_pred["Dif"] = (100 * (df_pred.Prediction - df_pred.Real) / df_pred.Real) print( tabulate( df_pred, headers=[ "Date", "Predicted", "Actual", "% Difference" ], showindex=True, floatfmt=".2f", tablefmt="fancy_grid", )) else: patch_pandas_text_adjustment() print("Time Real [$] x Prediction [$]") print( df_pred.apply(price_prediction_backtesting_color, axis=1).to_string()) else: if gtff.USE_TABULATE_DF: df_pred["Real"] = df_pred["Real"].astype(float) df_pred["Prediction"] = df_pred["Predicted"].astype(float) df_pred["Dif"] = (100 * (df_pred.Prediction - df_pred.Real) / df_pred.Real) print( tabulate( df_pred, headers=[ "Date", "Predicted", "Actual", "% Difference" ], showindex=True, floatfmt=".2f", tablefmt="fancy_grid", )) else: print(df_pred[["Real", "Prediction"]].round(2).to_string()) print("") print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values) else: # Print prediction data print_pretty_prediction(df_pred, values.values[-1]) export_data(export, os.path.dirname(os.path.abspath(__file__)), "arima") print("")
def display_regression( dataset: str, values: Union[pd.Series, pd.DataFrame], poly_order: int, n_input: int, n_predict: int, n_jumps: int, s_end_date: str = "", export: str = "", time_res: str = "", ): """Display predications for regression models Parameters ---------- dataset : str Title for data values : Union[pd.Series, pd.DataFrame] Data to fit poly_order : int Order of polynomial to fit n_input : int Length of input sequence n_predict : int Length of prediction sequence n_jumps : int Number of jumps in data s_end_date : str, optional Start date for backtesting export : str, optional Format for exporting figures time_res : str Resolution for data, allowing for predicting outside of standard market days """ # BACKTESTING if s_end_date: if not time_res: future_index = get_next_stock_market_days( last_stock_day=s_end_date, n_next_days=n_predict) else: future_index = pd.date_range(s_end_date, periods=n_predict + 1, freq=time_res)[1:] df_future = values[future_index[0]:future_index[-1]] values = values[:s_end_date] # type: ignore l_predictions, _ = regression_model.get_regression_model( values, poly_order, n_input, n_predict, n_jumps) # Prediction data if not time_res: l_pred_days = get_next_stock_market_days( last_stock_day=values.index[-1], n_next_days=n_predict, ) else: l_pred_days = pd.date_range(values.index[-1], periods=n_predict + 1, freq=time_res)[1:] df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price") # Plotting fig, ax = plt.subplots(figsize=plot_autoscale(), dpi=PLOT_DPI) ax.plot(values.index, values, lw=2) # BACKTESTING if s_end_date: ax.set_title( f"BACKTESTING: Regression (polynomial {poly_order}) on {dataset} - {n_predict} step prediction" ) else: ax.set_title( f"Regression (polynomial {poly_order}) on {dataset} - {n_predict} step prediction" ) ax.set_xlim(values.index[0], l_pred_days[-1]) ax.set_xlabel("Time") ax.set_ylabel("Value") ax.grid(b=True, which="major", color="#666666", linestyle="-") ax.minorticks_on() ax.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) ax.plot( [values.index[-1], df_pred.index[0]], [values.values[-1], df_pred.values[0]], lw=1, c="tab:green", linestyle="--", ) ax.plot(df_pred.index, df_pred, lw=2, c="tab:green") ax.axvspan(values.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2) _, _, ymin, ymax = plt.axis() ax.vlines(values.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k") # BACKTESTING if s_end_date: ax.plot( df_future.index, df_future, lw=2, c="tab:blue", ls="--", ) ax.plot( [values.index[-1], df_future.index[0]], [ values.values[-1], df_future.values[0], ], lw=1, c="tab:blue", linestyle="--", ) fig.tight_layout() if gtff.USE_ION: plt.ion() plt.show() export_data(export, os.path.dirname(os.path.abspath(__file__)), "regression") console.print("") # BACKTESTING if s_end_date: fig, ax = plt.subplots(1, 2, figsize=plot_autoscale(), dpi=PLOT_DPI) ax0 = ax[0] ax0.plot( df_future.index, df_future, lw=2, c="tab:blue", ls="--", ) ax0.plot(df_pred.index, df_pred, lw=2, c="green") ax0.scatter(df_future.index, df_future, c="tab:blue", lw=3) ax0.plot( [values.index[-1], df_future.index[0]], [ values.values[-1], df_future.values[0], ], lw=2, c="tab:blue", ls="--", ) ax0.scatter(df_pred.index, df_pred, c="green", lw=3) ax0.plot( [values.index[-1], df_pred.index[0]], [values.values[-1], df_pred.values[0]], lw=2, c="green", ls="--", ) ax0.set_title("BACKTESTING: Real data vs Prediction") ax0.set_xlim(values.index[-1], df_pred.index[-1]) ax0.set_xticks([values.index[-1], df_pred.index[-1]]) ax0.set_ylabel("Value") ax0.grid(b=True, which="major", color="#666666", linestyle="-") ax0.minorticks_on() ax0.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) ax0.legend(["Real data", "Prediction data"]) ax0.set_xticks([]) ax1 = ax[1] ax1.axhline(y=0, color="k", linestyle="--", linewidth=2) ax1.plot( df_future.index, 100 * (df_pred.values - df_future.values) / df_future.values, lw=2, c="red", ) ax1.scatter( df_future.index, 100 * (df_pred.values - df_future.values) / df_future.values, c="red", lw=5, ) ax1.set_title( "BACKTESTING: Error between Real data and Prediction [%]") ax1.plot( [values.index[-1], df_future.index[0]], [ 0, 100 * (df_pred.values[0] - df_future.values[0]) / df_future.values[0], ], lw=2, ls="--", c="red", ) ax1.set_xlim(values.index[-1], df_pred.index[-1]) ax1.set_xticks([values.index[-1], df_pred.index[-1]]) ax1.set_xlabel("Time") ax1.set_ylabel("Prediction Error (%)") ax1.grid(b=True, which="major", color="#666666", linestyle="-") ax1.minorticks_on() ax1.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) ax1.legend(["Real data", "Prediction data"]) fig.tight_layout() if gtff.USE_ION: plt.ion() plt.show() # Refactor prediction dataframe for backtesting print df_pred.name = "Prediction" df_pred = df_pred.to_frame() df_pred["Real"] = df_future if gtff.USE_COLOR: patch_pandas_text_adjustment() console.print("Time Real [$] x Prediction [$]") console.print( df_pred.apply(price_prediction_backtesting_color, axis=1).to_string()) else: console.print(df_pred[["Real", "Prediction"]].round(2).to_string()) console.print("") print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values) else: # Print prediction data print_pretty_prediction(df_pred, values.values[-1]) console.print("")
def display_arima( dataset: str, values: Union[pd.DataFrame, pd.Series], arima_order: str, n_predict: int, seasonal: bool, ic: str, results: bool, s_end_date: str = "", export: str = "", time_res: str = "", external_axes: Optional[List[plt.Axes]] = None, ): """View fit ARIMA model Parameters ---------- dataset : str String indicating dataset (for plot title) values : Union[pd.DataFrame, pd.Series] Data to fit arima_order : str String of ARIMA params in form "p,q,d" n_predict : int Days to predict seasonal : bool Flag to use seasonal model ic : str Information Criteria for model evaluation results : bool Flag to display model summary s_end_date : str, optional Specified end date for backtesting comparisons export : str, optional Format to export image time_res : str Resolution for data, allowing for predicting outside of standard market days external_axes : Optional[List[plt.Axes]], optional External axes (1 axis is expected in the list), by default None """ if arima_order: t_order = tuple(int(ord) for ord in arima_order.split(",")) if s_end_date: if not time_res: future_index = get_next_stock_market_days( last_stock_day=s_end_date, n_next_days=n_predict) else: future_index = pd.date_range(s_end_date, periods=n_predict + 1, freq=time_res)[1:] if future_index[-1] > datetime.datetime.now(): console.print( "Backtesting not allowed, since End Date + Prediction days is in the future\n" ) return df_future = values[future_index[0]:future_index[-1]] # noqa: E203 values = values[:s_end_date] # type: ignore l_predictions, model = arima_model.get_arima_model(values, arima_order, n_predict, seasonal, ic) # Prediction data if not time_res: l_pred_days = get_next_stock_market_days( last_stock_day=values.index[-1], n_next_days=n_predict, ) else: l_pred_days = pd.date_range(values.index[-1], periods=n_predict + 1, freq=time_res)[1:] df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price") if results: console.print(model.summary()) console.print("") # This plot has 1 axes if external_axes is None: _, ax = plt.subplots(figsize=plot_autoscale(), dpi=PLOT_DPI) else: if (not s_end_date and len(external_axes) != 1) or (s_end_date and len(external_axes) != 3): logger.error( "Expected list of 1 axis item or 3 axis items when backtesting" ) console.print("[red]Expected list of 1 axis item " + "or 3 axis items when backtesting./n[/red]") return ax = external_axes[0] ax.plot(values.index, values) # pylint:disable=no-member if arima_order: # BACKTESTING if s_end_date: ax.set_title( f"BACKTESTING: ARIMA {str(t_order)} on {dataset} - {n_predict} step prediction" ) else: ax.set_title( f"ARIMA {str(t_order)} on {dataset} - {n_predict} step prediction" ) else: # BACKTESTING if s_end_date: ax.set_title( f"BACKTESTING: ARIMA {model.order} on {dataset} - {n_predict} step prediction" ) else: plt.title( f"ARIMA {model.order} on {dataset} - {n_predict} step prediction" ) ax.set_xlim(values.index[0], l_pred_days[-1]) ax.set_ylabel("Value") ax.plot( [values.index[-1], df_pred.index[0]], [values.values[-1], df_pred.values[0]], color=theme.up_color, linestyle="--", ) ax.plot(df_pred.index, df_pred, color=theme.up_color) ax.axvspan(values.index[-1], df_pred.index[-1], alpha=0.2) _, _, ymin, ymax = plt.axis() ax.vlines(values.index[-1], ymin, ymax, linestyle="--") # BACKTESTING if s_end_date: ax.plot( df_future.index, df_future, color=theme.up_color, linestyle="--", ) ax.plot( [values.index[-1], df_future.index[0]], [ values.values[-1], df_future.values[0], ], color=theme.up_color, linestyle="--", ) theme.style_primary_axis(ax) if external_axes is None: theme.visualize_output() # BACKTESTING if s_end_date: # This plot has 1 axes if external_axes is None: _, axes = plt.subplots(2, 1, sharex=True, figsize=plot_autoscale(), dpi=PLOT_DPI) (ax2, ax3) = axes else: if len(external_axes) != 3: logger.error("Expected list of one axis item.") console.print("[red]Expected list of 1 axis item./n[/red]") return (_, ax2, ax3) = external_axes ax2.plot( df_future.index, df_future, color=theme.up_color, linestyle="--", ) ax2.plot(df_pred.index, df_pred) ax2.scatter( df_future.index, df_future, color=theme.up_color, ) ax2.plot( [values.index[-1], df_future.index[0]], [ values.values[-1], df_future.values[0], ], color=theme.up_color, linestyle="--", ) ax2.scatter(df_pred.index, df_pred) ax2.plot( [values.index[-1], df_pred.index[0]], [values.values[-1], df_pred.values[0]], linestyle="--", ) ax2.set_title("BACKTESTING: Values") ax2.set_xlim( values.index[-1], df_pred.index[-1] + datetime.timedelta(days=1), ) ax2.set_ylabel("Value") ax2.legend(["Real data", "Prediction data"]) theme.style_primary_axis(ax2) ax3.axhline(y=0, linestyle="--") ax3.plot( df_future.index, 100 * (df_pred.values - df_future.values) / df_future.values, color=theme.down_color, ) ax3.scatter( df_future.index, 100 * (df_pred.values - df_future.values) / df_future.values, color=theme.down_color, ) ax3.set_title("BACKTESTING: % Error") ax3.plot( [values.index[-1], df_future.index[0]], [ 0, 100 * (df_pred.values[0] - df_future.values[0]) / df_future.values[0], ], ls="--", color=theme.down_color, ) ax3.set_xlim( values.index[-1], df_pred.index[-1] + datetime.timedelta(days=1), ) ax3.set_ylabel("Prediction Error (%)") theme.style_primary_axis(ax3) if external_axes is None: theme.visualize_output() # Refactor prediction dataframe for backtesting print df_pred.name = "Prediction" df_pred = df_pred.to_frame() df_pred["Real"] = df_future.values if gtff.USE_COLOR: df_pred["Real"] = df_pred["Real"].astype(float) df_pred["Prediction"] = df_pred["Prediction"].astype(float) df_pred["Dif"] = 100 * (df_pred.Prediction - df_pred.Real) / df_pred.Real print_rich_table( df_pred, headers=["Predicted", "Actual", "% Difference"], index_name="Date", show_index=True, title="ARIMA Model", ) else: df_pred["Real"] = df_pred["Real"].astype(float) df_pred["Prediction"] = df_pred["Predicted"].astype(float) df_pred["Dif"] = 100 * (df_pred.Prediction - df_pred.Real) / df_pred.Real print_rich_table( df_pred, headers=["Date", "Predicted", "Actual", "% Difference"], show_index=True, title="ARIMA Model", ) console.print("") print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values) else: # Print prediction data print_pretty_prediction(df_pred, values.values[-1]) export_data(export, os.path.dirname(os.path.abspath(__file__)), "arima") console.print("")
def display_exponential_smoothing( ticker: str, values: Union[pd.DataFrame, pd.Series], n_predict: int, trend: str = "N", seasonal: str = "N", seasonal_periods: int = 5, s_end_date: str = "", export: str = "", time_res: str = "", external_axes: Optional[List[plt.Axes]] = None, ): """Perform exponential smoothing Parameters ---------- ticker : str Dataset being smoothed values : Union[pd.DataFrame, pd.Series] Raw data n_predict : int Days to predict trend : str, optional Trend variable, by default "N" seasonal : str, optional Seasonal variable, by default "N" seasonal_periods : int, optional Number of seasonal periods, by default 5 s_end_date : str, optional End date for backtesting, by default "" export : str, optional Format to export data, by default "" time_res : str Resolution for data, allowing for predicting outside of standard market days external_axes : Optional[List[plt.Axes]], optional External axes (1 axis is expected in the list), by default None """ if s_end_date: if not time_res: future_index = get_next_stock_market_days( last_stock_day=s_end_date, n_next_days=n_predict ) else: future_index = pd.date_range( s_end_date, periods=n_predict + 1, freq=time_res )[1:] if future_index[-1] > datetime.datetime.now(): console.print( "Backtesting not allowed," + " since End Date + Prediction days is in the future\n" ) return df_future = values[future_index[0] : future_index[-1]] # noqa: E203 values = values[:s_end_date] # type: ignore # Get ETS model model, title, forecast = ets_model.get_exponential_smoothing_model( values, trend, seasonal, seasonal_periods, n_predict ) if not forecast: console.print("No forecast made. Model did not converge.\n") return if np.isnan(forecast).any(): console.print("Model predicted NaN values. Runtime Error.\n") return if not time_res: l_pred_days = get_next_stock_market_days( last_stock_day=values.index[-1], n_next_days=n_predict, ) else: l_pred_days = pd.date_range( values.index[-1], periods=n_predict + 1, freq=time_res )[1:] df_pred = pd.Series(forecast, index=l_pred_days, name="Price") console.print(f"\n{title}") console.print("\nFit model parameters:") for key, value in model.params.items(): console.print(f"{key} {' '*(18-len(key))}: {value}") console.print("\nAssess fit model:") console.print(f"AIC: {round(model.aic, 2)}") console.print(f"BIC: {round(model.bic, 2)}") console.print(f"SSE: {round(model.sse, 2)}\n") # Plotting # This plot has 1 axes if external_axes is None: _, ax1 = plt.subplots(figsize=plot_autoscale(), dpi=PLOT_DPI) else: if (not s_end_date and len(external_axes) != 1) or ( s_end_date and len(external_axes) != 3 ): console.print( "[red]Expected list of 1 axis item " + "or 3 axis items when backtesting./n[/red]" ) return ax1 = external_axes[0] ax1.plot(values.index, values.values) # BACKTESTING if s_end_date: ax1.set_title(f"BACKTESTING: {title} on {ticker}", fontsize=12) else: ax1.set_title(f"{title} on {ticker}", fontsize=12) ax1.set_xlim( values.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1], ) ax1.set_ylabel("Value") ax1.plot( [values.index[-1], df_pred.index[0]], [values.values[-1], df_pred.values[0]], color=theme.down_color, linestyle="--", ) ax1.plot(df_pred.index, df_pred, color=theme.down_color) ax1.axvspan( values.index[-1], df_pred.index[-1], facecolor=theme.down_color, alpha=0.2, ) _, _, ymin, ymax = plt.axis() ax1.vlines( values.index[-1], ymin, ymax, linestyle="--", color=theme.get_colors(reverse=True)[0], ) # BACKTESTING if s_end_date: ax1.plot( df_future.index, df_future, color=theme.up_color, linestyle="--", ) ax1.plot( [values.index[-1], df_future.index[0]], [ values.values[-1], df_future.values[0], ], color=theme.up_color, linestyle="--", ) theme.style_primary_axis(ax1) if external_axes is None: theme.visualize_output() # BACKTESTING if s_end_date: # This plot has 1 axes if external_axes is None: _, axes = plt.subplots( 2, 1, sharex=True, figsize=plot_autoscale(), dpi=PLOT_DPI ) (ax2, ax3) = axes else: if len(external_axes) != 3: console.print("[red]Expected list of 1 axis item./n[/red]") return (_, ax2, ax3) = external_axes ax2.plot( df_future.index, df_future, color=theme.up_color, linestyle="--", ) ax2.plot(df_pred.index, df_pred) ax2.scatter( df_future.index, df_future, color=theme.up_color, ) ax2.plot( [values.index[-1], df_future.index[0]], [ values.values[-1], df_future.values[0], ], color=theme.up_color, linestyle="--", ) ax2.scatter(df_pred.index, df_pred) ax2.plot( [values.index[-1], df_pred.index[0]], [values.values[-1], df_pred.values[0]], linestyle="--", ) ax2.set_title("BACKTESTING: Values") ax2.set_xlim( values.index[-1], df_pred.index[-1] + datetime.timedelta(days=1), ) ax2.set_ylabel("Value") ax2.legend(["Real data", "Prediction data"]) theme.style_primary_axis(ax2) ax3.axhline(y=0, linestyle="--") ax3.plot( df_future.index, 100 * (df_pred.values - df_future.values) / df_future.values, color=theme.down_color, ) ax3.scatter( df_future.index, 100 * (df_pred.values - df_future.values) / df_future.values, color=theme.down_color, ) ax3.set_title("BACKTESTING: % Error") ax3.plot( [values.index[-1], df_future.index[0]], [ 0, 100 * (df_pred.values[0] - df_future.values[0]) / df_future.values[0], ], ls="--", color=theme.down_color, ) ax3.set_xlim( values.index[-1], df_pred.index[-1] + datetime.timedelta(days=1), ) ax3.set_ylabel("Prediction Error (%)") theme.style_primary_axis(ax3) if external_axes is None: theme.visualize_output() # Refactor prediction dataframe for backtesting print df_pred.name = "Prediction" df_pred = df_pred.to_frame() df_pred["Real"] = df_future if gtff.USE_COLOR: patch_pandas_text_adjustment() console.print("Time Real [$] x Prediction [$]") console.print( df_pred.apply( lambda_price_prediction_backtesting_color, axis=1 ).to_string() ) else: console.print(df_pred[["Real", "Prediction"]].round(2).to_string()) console.print("") print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values) else: # Print prediction data print_pretty_prediction(df_pred, values.values[-1]) export_data(export, os.path.dirname(os.path.abspath(__file__)), "ets")
def exponential_smoothing(other_args: List[str], s_ticker: str, df_stock: pd.DataFrame): """ Perform exponential smoothing forecasting Parameters ---------- other_args: List[str] Argparse arguments s_ticker: str Loaded ticker df_stock: pd.DataFrame Loaded stock dataframe """ parser = argparse.ArgumentParser( add_help=False, prog="ets", description=""" Exponential Smoothing, see https://otexts.com/fpp2/taxonomy.html Trend='N', Seasonal='N': Simple Exponential Smoothing Trend='N', Seasonal='A': Exponential Smoothing Trend='N', Seasonal='M': Exponential Smoothing Trend='A', Seasonal='N': Holt’s linear method Trend='A', Seasonal='A': Additive Holt-Winters’ method Trend='A', Seasonal='M': Multiplicative Holt-Winters’ method Trend='Ad', Seasonal='N': Additive damped trend method Trend='Ad', Seasonal='A': Exponential Smoothing Trend='Ad', Seasonal='M': Holt-Winters’ damped method Trend component: N: None, A: Additive, Ad: Additive Damped Seasonality component: N: None, A: Additive, M: Multiplicative """, ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days.", ) parser.add_argument( "-t", "--trend", action="store", dest="trend", type=check_valid_trend, default="N", help="Trend component: N: None, A: Additive, Ad: Additive Damped.", ) parser.add_argument( "-s", "--seasonal", action="store", dest="seasonal", type=check_valid_seasonal, default="N", help="Seasonality component: N: None, A: Additive, M: Multiplicative.", ) parser.add_argument( "-p", "--periods", action="store", dest="seasonal_periods", type=check_positive, default=5, help="Seasonal periods.", ) parser.add_argument( "-e", "--end", action="store", type=valid_date, dest="s_end_date", default=None, help="The end date (format YYYY-MM-DD) to select - Backtesting", ) try: ns_parser = parse_known_args_and_warn(parser, other_args) if not ns_parser: return # BACKTESTING if ns_parser.s_end_date: if ns_parser.s_end_date < df_stock.index[0]: print( "Backtesting not allowed, since End Date is older than Start Date of historical data\n" ) return if (ns_parser.s_end_date < get_next_stock_market_days( last_stock_day=df_stock.index[0], n_next_days=5 + ns_parser.n_days)[-1]): print( "Backtesting not allowed, since End Date is too close to Start Date to train model\n" ) return future_index = get_next_stock_market_days( last_stock_day=ns_parser.s_end_date, n_next_days=ns_parser.n_days) if future_index[-1] > datetime.datetime.now(): print( "Backtesting not allowed, since End Date + Prediction days is in the future\n" ) return df_future = df_stock[future_index[0]:future_index[-1]] df_stock = df_stock[:ns_parser.s_end_date] # Get ETS model model, title = get_exponential_smoothing_model( df_stock["Adj Close"].values, ns_parser.trend, ns_parser.seasonal, ns_parser.seasonal_periods, ) if model.mle_retvals.success: forecast = [ i if i > 0 else 0 for i in model.forecast(ns_parser.n_days) ] l_pred_days = get_next_stock_market_days( last_stock_day=df_stock["Adj Close"].index[-1], n_next_days=ns_parser.n_days, ) df_pred = pd.Series(forecast, index=l_pred_days, name="Price") if ~np.isnan(forecast).any(): print(f"\n{title}") print("\nFit model parameters:") for key, value in model.params.items(): print(f"{key} {' '*(18-len(key))}: {value}") print("\nAssess fit model:") print(f"AIC: {round(model.aic, 2)}") print(f"BIC: {round(model.bic, 2)}") print(f"SSE: {round(model.sse, 2)}\n") # Plotting plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.plot(df_stock.index, df_stock["Adj Close"], lw=2) # BACKTESTING if ns_parser.s_end_date: plt.title(f"BACKTESTING: {title} on {s_ticker}") else: plt.title(f"{title} on {s_ticker}") plt.xlim( df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1], ) plt.xlabel("Time") plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["Adj Close"].values[-1], df_pred.values[0]], lw=1, c="tab:green", linestyle="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="tab:green") plt.axvspan( df_stock.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2, ) _, _, ymin, ymax = plt.axis() plt.vlines( df_stock.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k", ) # BACKTESTING if ns_parser.s_end_date: plt.plot( df_future.index, df_future["Adj Close"], lw=2, c="tab:blue", ls="--", ) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["Adj Close"].values[-1], df_future["Adj Close"].values[0], ], lw=1, c="tab:blue", linestyle="--", ) if gtff.USE_ION: plt.ion() plt.show() # BACKTESTING if ns_parser.s_end_date: plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.subplot(211) plt.plot( df_future.index, df_future["Adj Close"], lw=2, c="tab:blue", ls="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="green") plt.scatter( df_future.index, df_future["Adj Close"], c="tab:blue", lw=3, ) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["Adj Close"].values[-1], df_future["Adj Close"].values[0], ], lw=2, c="tab:blue", ls="--", ) plt.scatter(df_pred.index, df_pred, c="green", lw=3) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["Adj Close"].values[-1], df_pred.values[0]], lw=2, c="green", ls="--", ) plt.title("BACKTESTING: Real data price versus Prediction") plt.xlim( df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1), ) plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) plt.xticks([]) plt.subplot(212) plt.axhline(y=0, color="k", linestyle="--", linewidth=2) plt.plot( df_future.index, 100 * (df_pred.values - df_future["Adj Close"].values) / df_future["Adj Close"].values, lw=2, c="red", ) plt.scatter( df_future.index, 100 * (df_pred.values - df_future["Adj Close"].values) / df_future["Adj Close"].values, c="red", lw=5, ) plt.title( "BACKTESTING: Error between Real data and Prediction [%]" ) plt.plot( [df_stock.index[-1], df_future.index[0]], [ 0, 100 * (df_pred.values[0] - df_future["Adj Close"].values[0]) / df_future["Adj Close"].values[0], ], lw=2, ls="--", c="red", ) plt.xlim( df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1), ) plt.xlabel("Time") plt.ylabel("Prediction Error (%)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) if gtff.USE_ION: plt.ion() plt.show() # Refactor prediction dataframe for backtesting print df_pred.name = "Prediction" df_pred = df_pred.to_frame() df_pred["Real"] = df_future["Adj Close"] if gtff.USE_COLOR: patch_pandas_text_adjustment() print("Time Real [$] x Prediction [$]") print( df_pred.apply(price_prediction_backtesting_color, axis=1).to_string()) else: print(df_pred[["Real", "Prediction"]].round(2).to_string()) print("") print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values) else: # Print prediction data print_pretty_prediction(df_pred, df_stock["Adj Close"].values[-1]) print("") else: print( "RuntimeWarning: invalid value encountered in double_scalars." ) else: print("ConvergenceWarning: Optimization failed to converge.") except Exception as e: print(e) print("")
def regression(other_args: List[str], s_ticker: str, df_stock: pd.DataFrame, polynomial: int): """ Train a regression model Parameters ---------- other_args: List[str] Argparse arguments s_ticker: str Stock ticker df_stock: pd.DataFrame Dataframe of stock prices polynomial: int Order of polynomial """ parser = argparse.ArgumentParser( add_help=False, prog="regression", description=""" Regression attempts to model the relationship between two variables by fitting a linear/quadratic/cubic/other equation to observed data. One variable is considered to be an explanatory variable, and the other is considered to be a dependent variable. """, ) parser.add_argument( "-i", "--input", action="store", dest="n_inputs", type=check_positive, default=40, help="number of days to use for prediction.", ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days.", ) parser.add_argument( "-j", "--jumps", action="store", dest="n_jumps", type=check_positive, default=1, help="number of jumps in training data.", ) parser.add_argument( "-e", "--end", action="store", type=valid_date, dest="s_end_date", default=None, help="The end date (format YYYY-MM-DD) to select - Backtesting", ) if polynomial == USER_INPUT: parser.add_argument( "-p", "--polynomial", action="store", dest="n_polynomial", type=check_positive, required=True, help="polynomial associated with regression.", ) try: ns_parser = parse_known_args_and_warn(parser, other_args) if not ns_parser: return # BACKTESTING if ns_parser.s_end_date: if ns_parser.s_end_date < df_stock.index[0]: print( "Backtesting not allowed, since End Date is older than Start Date of historical data\n" ) return if ns_parser.s_end_date < get_next_stock_market_days( last_stock_day=df_stock.index[0], n_next_days=ns_parser.n_inputs + ns_parser.n_days, )[-1]: print( "Backtesting not allowed, since End Date is too close to Start Date to train model\n" ) return future_index = get_next_stock_market_days( last_stock_day=ns_parser.s_end_date, n_next_days=ns_parser.n_days) if future_index[-1] > datetime.datetime.now(): print( "Backtesting not allowed, since End Date + Prediction days is in the future\n" ) return df_future = df_stock[future_index[0]:future_index[-1]] df_stock = df_stock[:ns_parser.s_end_date] # Split training data stock_x, stock_y = splitTrain.split_train( df_stock["Adj Close"].values, ns_parser.n_inputs, ns_parser.n_days, ns_parser.n_jumps, ) if not stock_x: print("Given the model parameters more training data is needed.\n") return # Machine Learning model if polynomial == LINEAR: model = linear_model.LinearRegression(n_jobs=-1) else: if polynomial == USER_INPUT: polynomial = ns_parser.n_polynomial model = pipeline.make_pipeline( preprocessing.PolynomialFeatures(polynomial), linear_model.Ridge()) model.fit(stock_x, stock_y) l_predictions = [ i if i > 0 else 0 for i in model.predict( df_stock["Adj Close"].values[-ns_parser.n_inputs:].reshape( 1, -1))[0] ] # Prediction data l_pred_days = get_next_stock_market_days( last_stock_day=df_stock["Adj Close"].index[-1], n_next_days=ns_parser.n_days, ) df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price") # Plotting plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.plot(df_stock.index, df_stock["Adj Close"], lw=2) # BACKTESTING if ns_parser.s_end_date: plt.title( f"BACKTESTING: Regression (polynomial {polynomial}) on {s_ticker} - {ns_parser.n_days} days prediction" ) else: plt.title( f"Regression (polynomial {polynomial}) on {s_ticker} - {ns_parser.n_days} days prediction" ) plt.xlim(df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1]) plt.xlabel("Time") plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["Adj Close"].values[-1], df_pred.values[0]], lw=1, c="tab:green", linestyle="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="tab:green") plt.axvspan(df_stock.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2) _, _, ymin, ymax = plt.axis() plt.vlines(df_stock.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k") # BACKTESTING if ns_parser.s_end_date: plt.plot( df_future.index, df_future["Adj Close"], lw=2, c="tab:blue", ls="--", ) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["Adj Close"].values[-1], df_future["Adj Close"].values[0], ], lw=1, c="tab:blue", linestyle="--", ) if gtff.USE_ION: plt.ion() plt.show() # BACKTESTING if ns_parser.s_end_date: plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.subplot(211) plt.plot( df_future.index, df_future["Adj Close"], lw=2, c="tab:blue", ls="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="green") plt.scatter(df_future.index, df_future["Adj Close"], c="tab:blue", lw=3) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["Adj Close"].values[-1], df_future["Adj Close"].values[0], ], lw=2, c="tab:blue", ls="--", ) plt.scatter(df_pred.index, df_pred, c="green", lw=3) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["Adj Close"].values[-1], df_pred.values[0]], lw=2, c="green", ls="--", ) plt.title("BACKTESTING: Real data price versus Prediction") plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)) plt.xticks( [ df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1) ], visible=True, ) plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) plt.xticks([]) plt.subplot(212) plt.axhline(y=0, color="k", linestyle="--", linewidth=2) plt.plot( df_future.index, 100 * (df_pred.values - df_future["Adj Close"].values) / df_future["Adj Close"].values, lw=2, c="red", ) plt.scatter( df_future.index, 100 * (df_pred.values - df_future["Adj Close"].values) / df_future["Adj Close"].values, c="red", lw=5, ) plt.title( "BACKTESTING: Error between Real data and Prediction [%]") plt.plot( [df_stock.index[-1], df_future.index[0]], [ 0, 100 * (df_pred.values[0] - df_future["Adj Close"].values[0]) / df_future["Adj Close"].values[0], ], lw=2, ls="--", c="red", ) plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)) plt.xticks( [ df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1) ], visible=True, ) plt.xlabel("Time") plt.ylabel("Prediction Error (%)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) if gtff.USE_ION: plt.ion() plt.show() # Refactor prediction dataframe for backtesting print df_pred.name = "Prediction" df_pred = df_pred.to_frame() df_pred["Real"] = df_future["Adj Close"] if gtff.USE_COLOR: patch_pandas_text_adjustment() print("Time Real [$] x Prediction [$]") print( df_pred.apply(price_prediction_backtesting_color, axis=1).to_string()) else: print(df_pred[["Real", "Prediction"]].round(2).to_string()) print("") print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values) else: # Print prediction data print_pretty_prediction(df_pred, df_stock["Adj Close"].values[-1]) print("") except SystemExit: print("") except Exception as e: print(e) print("")