def conv1d(other_args: List[str], s_ticker: str, df_stock: pd.DataFrame): """ Train a 1D Convolutional Neural Net (1D CNN) Parameters ---------- other_args:List[str] Argparse arguments s_ticker: str Stock ticker df_stock: pd.DataFrame Dataframe of stock prices """ try: ns_parser = parse_args( prog="conv1d", description="""1D CNN.""", other_args=other_args, ) if not ns_parser: return ( X_train, X_valid, y_train, y_valid, _, _, _, y_dates_valid, forecast_data_input, dates_forecast_input, scaler, is_error, ) = prepare_scale_train_valid_test(df_stock["5. adjusted close"], ns_parser) if is_error: return print( f"Training on {X_train.shape[0]} sequences of length {X_train.shape[1]}. Using {X_valid.shape[0]} sequences " f" of length {X_valid.shape[1]} for validation. Model will run {ns_parser.n_loops} loops" ) future_dates = get_next_stock_market_days(dates_forecast_input[-1], n_next_days=ns_parser.n_days) preds = np.zeros( (ns_parser.n_loops, X_valid.shape[0], ns_parser.n_days)) forecast_data = np.zeros((ns_parser.n_loops, ns_parser.n_days)) for i in range(ns_parser.n_loops): # Build Neural Network model model = build_neural_network_model( cfg_nn_models.Convolutional, ns_parser.n_inputs, ns_parser.n_days, ) model.compile( optimizer=optimizers[cfg_nn_models.Optimizer](lr=ns_parser.lr), loss=cfg_nn_models.Loss, ) model.fit( X_train.reshape(X_train.shape[0], X_train.shape[1], 1), y_train, epochs=ns_parser.n_epochs, verbose=True, batch_size=ns_parser.n_batch_size, validation_data=( X_valid.reshape(X_valid.shape[0], X_valid.shape[1], 1), y_valid, ), callbacks=[es], ) preds[i] = model.predict( X_valid.reshape(X_valid.shape[0], X_valid.shape[1], 1)).reshape(X_valid.shape[0], ns_parser.n_days) forecast_data[i] = forecast(forecast_data_input, future_dates, model, scaler).values.flat forecast_data_df = pd.DataFrame(forecast_data.T, index=future_dates) if ns_parser.n_loops > 1: forecast_data_df["Median"] = forecast_data_df.median(axis=1) print_pretty_prediction(forecast_data_df["Median"], df_stock["5. adjusted close"].values[-1]) else: print_pretty_prediction(forecast_data_df[0], df_stock["5. adjusted close"].values[-1]) plot_data_predictions( df_stock, np.median(preds, axis=0), y_valid, y_dates_valid, scaler, f"Conv1D Model on {s_ticker}", forecast_data_df, ns_parser.n_loops, ) print("") except Exception as e: print(e) traceback.print_exc() print("") finally: restore_env()
def arima(other_args: List[str], s_ticker: str, df_stock: pd.DataFrame): """ ARIMA prediction Parameters ---------- other_args: List[str] Argparse arguments s_ticker: str ticker df_stock: pd.DataFrame Dataframe of prices """ parser = argparse.ArgumentParser( add_help=False, prog="arima", description=""" In statistics and econometrics, and in particular in time series analysis, an autoregressive integrated moving average (ARIMA) model is a generalization of an autoregressive moving average (ARMA) model. Both of these models are fitted to time series data either to better understand the data or to predict future points in the series (forecasting). ARIMA(p,d,q) where parameters p, d, and q are non-negative integers, p is the order (number of time lags) of the autoregressive model, d is the degree of differencing (the number of times the data have had past values subtracted), and q is the order of the moving-average model. """, ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days.", ) parser.add_argument( "-i", "--ic", action="store", dest="s_ic", type=str, default="aic", choices=["aic", "aicc", "bic", "hqic", "oob"], help="information criteria.", ) parser.add_argument( "-s", "--seasonal", action="store_true", default=False, dest="b_seasonal", help="Use weekly seasonal data.", ) parser.add_argument( "-o", "--order", action="store", dest="s_order", type=str, help="arima model order (p,d,q) in format: p,d,q.", ) parser.add_argument( "-r", "--results", action="store_true", dest="b_results", default=False, help="results about ARIMA summary flag.", ) parser.add_argument( "-e", "--end", action="store", type=valid_date, dest="s_end_date", default=None, help="The end date (format YYYY-MM-DD) to select - Backtesting", ) try: ns_parser = parse_known_args_and_warn(parser, other_args) if not ns_parser: return # BACKTESTING if ns_parser.s_end_date: if ns_parser.s_end_date < df_stock.index[0]: print( "Backtesting not allowed, since End Date is older than Start Date of historical data\n" ) return if (ns_parser.s_end_date < get_next_stock_market_days( last_stock_day=df_stock.index[0], n_next_days=5 + ns_parser.n_days)[-1]): print( "Backtesting not allowed, since End Date is too close to Start Date to train model\n" ) return future_index = get_next_stock_market_days( last_stock_day=ns_parser.s_end_date, n_next_days=ns_parser.n_days) if future_index[-1] > datetime.datetime.now(): print( "Backtesting not allowed, since End Date + Prediction days is in the future\n" ) return df_future = df_stock[future_index[0]:future_index[-1]] df_stock = df_stock[:ns_parser.s_end_date] # Machine Learning model if ns_parser.s_order: t_order = tuple(int(ord) for ord in ns_parser.s_order.split(",")) model = ARIMA(df_stock["5. adjusted close"].values, order=t_order).fit() l_predictions = model.predict( start=len(df_stock["5. adjusted close"]) + 1, end=len(df_stock["5. adjusted close"]) + ns_parser.n_days, ) else: if ns_parser.b_seasonal: model = pmdarima.auto_arima( df_stock["5. adjusted close"].values, error_action="ignore", seasonal=True, m=5, information_criteria=ns_parser.s_ic, ) else: model = pmdarima.auto_arima( df_stock["5. adjusted close"].values, error_action="ignore", seasonal=False, information_criteria=ns_parser.s_ic, ) l_predictions = [ i if i > 0 else 0 for i in model.predict(n_periods=ns_parser.n_days) ] # Prediction data l_pred_days = get_next_stock_market_days( last_stock_day=df_stock["5. adjusted close"].index[-1], n_next_days=ns_parser.n_days, ) df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price") if ns_parser.b_results: print(model.summary()) print("") # Plotting plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2) if ns_parser.s_order: # BACKTESTING if ns_parser.s_end_date: plt.title( f"BACKTESTING: ARIMA {str(t_order)} on {s_ticker} - {ns_parser.n_days} days prediction" ) else: plt.title( f"ARIMA {str(t_order)} on {s_ticker} - {ns_parser.n_days} days prediction" ) else: # BACKTESTING if ns_parser.s_end_date: plt.title( f"BACKTESTING: ARIMA {model.order} on {s_ticker} - {ns_parser.n_days} days prediction" ) else: plt.title( f"ARIMA {model.order} on {s_ticker} - {ns_parser.n_days} days prediction" ) plt.xlim(df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1]) plt.xlabel("Time") plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["5. adjusted close"].values[-1], df_pred.values[0]], lw=1, c="tab:green", linestyle="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="tab:green") plt.axvspan(df_stock.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2) _, _, ymin, ymax = plt.axis() plt.vlines(df_stock.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k") # BACKTESTING if ns_parser.s_end_date: plt.plot( df_future.index, df_future["5. adjusted close"], lw=2, c="tab:blue", ls="--", ) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["5. adjusted close"].values[-1], df_future["5. adjusted close"].values[0], ], lw=1, c="tab:blue", linestyle="--", ) if gtff.USE_ION: plt.ion() plt.show() # BACKTESTING if ns_parser.s_end_date: plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.subplot(211) plt.plot( df_future.index, df_future["5. adjusted close"], lw=2, c="tab:blue", ls="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="green") plt.scatter(df_future.index, df_future["5. adjusted close"], c="tab:blue", lw=3) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["5. adjusted close"].values[-1], df_future["5. adjusted close"].values[0], ], lw=2, c="tab:blue", ls="--", ) plt.scatter(df_pred.index, df_pred, c="green", lw=3) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["5. adjusted close"].values[-1], df_pred.values[0]], lw=2, c="green", ls="--", ) plt.title("BACKTESTING: Real data price versus Prediction") plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)) plt.xticks( [ df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1) ], visible=True, ) plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) plt.xticks([]) plt.subplot(212) plt.axhline(y=0, color="k", linestyle="--", linewidth=2) plt.plot( df_future.index, 100 * (df_pred.values - df_future["5. adjusted close"].values) / df_future["5. adjusted close"].values, lw=2, c="red", ) plt.scatter( df_future.index, 100 * (df_pred.values - df_future["5. adjusted close"].values) / df_future["5. adjusted close"].values, c="red", lw=5, ) plt.title( "BACKTESTING: Error between Real data and Prediction [%]") plt.plot( [df_stock.index[-1], df_future.index[0]], [ 0, 100 * (df_pred.values[0] - df_future["5. adjusted close"].values[0]) / df_future["5. adjusted close"].values[0], ], lw=2, ls="--", c="red", ) plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)) plt.xticks( [ df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1) ], visible=True, ) plt.xlabel("Time") plt.ylabel("Prediction Error (%)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) if gtff.USE_ION: plt.ion() plt.show() # Refactor prediction dataframe for backtesting print df_pred.name = "Prediction" df_pred = df_pred.to_frame() df_pred["Real"] = df_future["5. adjusted close"] if gtff.USE_COLOR: patch_pandas_text_adjustment() print("Time Real [$] x Prediction [$]") print( df_pred.apply(price_prediction_backtesting_color, axis=1).to_string()) else: print(df_pred[["Real", "Prediction"]].round(2).to_string()) print("") print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values) else: # Print prediction data print_pretty_prediction(df_pred, df_stock["5. adjusted close"].values[-1]) print("") except Exception as e: print(e, "\n")
def k_nearest_neighbors(l_args, s_ticker, df_stock): parser = argparse.ArgumentParser( add_help=False, prog="knn", description=""" K nearest neighbors is a simple algorithm that stores all available cases and predict the numerical target based on a similarity measure (e.g. distance functions). """, ) parser.add_argument( "-i", "--input", action="store", dest="n_inputs", type=check_positive, default=40, help="number of days to use for prediction.", ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days.", ) parser.add_argument( "-j", "--jumps", action="store", dest="n_jumps", type=check_positive, default=1, help="number of jumps in training data.", ) parser.add_argument( "-n", "--neighbors", action="store", dest="n_neighbors", type=check_positive, default=20, help="number of neighbors to use on the algorithm.", ) parser.add_argument( "-e", "--end", action="store", type=valid_date, dest="s_end_date", default=None, help="The end date (format YYYY-MM-DD) to select - Backtesting", ) try: ns_parser = parse_known_args_and_warn(parser, l_args) if not ns_parser: return # BACKTESTING if ns_parser.s_end_date: if ns_parser.s_end_date < df_stock.index[0]: print( "Backtesting not allowed, since End Date is older than Start Date of historical data\n" ) return if ns_parser.s_end_date < get_next_stock_market_days( last_stock_day=df_stock.index[0], n_next_days=ns_parser.n_inputs + ns_parser.n_days, )[-1]: print( "Backtesting not allowed, since End Date is too close to Start Date to train model\n" ) return future_index = get_next_stock_market_days( last_stock_day=ns_parser.s_end_date, n_next_days=ns_parser.n_days) if future_index[-1] > datetime.datetime.now(): print( "Backtesting not allowed, since End Date + Prediction days is in the future\n" ) return df_future = df_stock[future_index[0]:future_index[-1]] df_stock = df_stock[:ns_parser.s_end_date] # Split training data stock_x, stock_y = splitTrain.split_train( df_stock["5. adjusted close"].values, ns_parser.n_inputs, ns_parser.n_days, ns_parser.n_jumps, ) if not stock_x: print("Given the model parameters more training data is needed.\n") return # Machine Learning model knn = neighbors.KNeighborsRegressor(n_neighbors=ns_parser.n_neighbors) knn.fit(stock_x, stock_y) # Prediction data l_predictions = knn.predict( df_stock["5. adjusted close"].values[-ns_parser.n_inputs:].reshape( 1, -1))[0] l_pred_days = get_next_stock_market_days( last_stock_day=df_stock["5. adjusted close"].index[-1], n_next_days=ns_parser.n_days, ) df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price") # Plotting plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2) s_knn = f"{ns_parser.n_neighbors}-Nearest Neighbors on {s_ticker}" # BACKTESTING if ns_parser.s_end_date: plt.title( f"BACKTESTING: {s_knn} - {ns_parser.n_days} days prediction") else: plt.title(f"{s_knn} - {ns_parser.n_days} days prediction") plt.xlim(df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1]) plt.xlabel("Time") plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["5. adjusted close"].values[-1], df_pred.values[0]], lw=1, c="tab:green", linestyle="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="tab:green") plt.axvspan(df_stock.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2) _, _, ymin, ymax = plt.axis() plt.vlines(df_stock.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k") # BACKTESTING if ns_parser.s_end_date: plt.plot( df_future.index, df_future["5. adjusted close"], lw=2, c="tab:blue", ls="--", ) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["5. adjusted close"].values[-1], df_future["5. adjusted close"].values[0], ], lw=1, c="tab:blue", linestyle="--", ) if gtff.USE_ION: plt.ion() plt.show() # BACKTESTING if ns_parser.s_end_date: plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.subplot(211) plt.plot( df_future.index, df_future["5. adjusted close"], lw=2, c="tab:blue", ls="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="green") plt.scatter(df_future.index, df_future["5. adjusted close"], c="tab:blue", lw=3) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["5. adjusted close"].values[-1], df_future["5. adjusted close"].values[0], ], lw=2, c="tab:blue", ls="--", ) plt.scatter(df_pred.index, df_pred, c="green", lw=3) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["5. adjusted close"].values[-1], df_pred.values[0]], lw=2, c="green", ls="--", ) plt.title("BACKTESTING: Real data price versus Prediction") plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)) plt.xticks([ df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1) ]) plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) plt.xticks([]) plt.subplot(212) plt.axhline(y=0, color="k", linestyle="--", linewidth=2) plt.plot( df_future.index, 100 * (df_pred.values - df_future["5. adjusted close"].values) / df_future["5. adjusted close"].values, lw=2, c="red", ) plt.scatter( df_future.index, 100 * (df_pred.values - df_future["5. adjusted close"].values) / df_future["5. adjusted close"].values, c="red", lw=5, ) plt.title( "BACKTESTING: Error between Real data and Prediction [%]") plt.plot( [df_stock.index[-1], df_future.index[0]], [ 0, 100 * (df_pred.values[0] - df_future["5. adjusted close"].values[0]) / df_future["5. adjusted close"].values[0], ], lw=2, ls="--", c="red", ) plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)) plt.xticks([ df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1) ]) plt.xlabel("Time") plt.ylabel("Prediction Error (%)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) if gtff.USE_ION: plt.ion() plt.show() # Refactor prediction dataframe for backtesting print df_pred.name = "Prediction" df_pred = df_pred.to_frame() df_pred["Real"] = df_future["5. adjusted close"] if gtff.USE_COLOR: patch_pandas_text_adjustment() print("Time Real [$] x Prediction [$]") print( df_pred.apply(price_prediction_backtesting_color, axis=1).to_string()) else: print(df_pred[["Real", "Prediction"]].round(2).to_string()) print("") print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values) else: # Print prediction data print_pretty_prediction(df_pred, df_stock["5. adjusted close"].values[-1]) print("") except Exception as e: print(e) print("")
def k_nearest_neighbors(other_args: List[str], s_ticker: str, df_stock: pd.DataFrame): """ Train KNN model Parameters ---------- other_args: List[str] List of argparse arguments s_ticker: str Ticker df_stock: pd.DataFrame Dataframe of stock prices Returns ------- """ parser = argparse.ArgumentParser( add_help=False, prog="knn", description=""" K nearest neighbors is a simple algorithm that stores all available cases and predict the numerical target based on a similarity measure (e.g. distance functions). """, ) parser.add_argument( "-i", "--input", action="store", dest="n_inputs", type=check_positive, default=40, help="number of days to use as input for prediction.", ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days.", ) parser.add_argument( "-j", "--jumps", action="store", dest="n_jumps", type=check_positive, default=1, help="number of jumps in training data.", ) parser.add_argument( "-n", "--neighbors", action="store", dest="n_neighbors", type=check_positive, default=20, help="number of neighbors to use on the algorithm.", ) parser.add_argument( "-e", "--end", action="store", type=valid_date, dest="s_end_date", default=None, help="The end date (format YYYY-MM-DD) to select for testing", ) parser.add_argument( "-t", "--test_size", default=0.2, dest="valid_split", type=float, help="Percentage of data to validate in sample", ) parser.add_argument( "-p", "--pp", action="store", dest="s_preprocessing", default="none", choices=["normalization", "standardization", "minmax", "none"], help="pre-processing data.", ) try: ns_parser = parse_known_args_and_warn(parser, other_args) if not ns_parser: return ( X_train, X_valid, y_train, y_valid, _, _, _, y_dates_valid, forecast_data_input, dates_forecast_input, scaler, is_error, ) = prepare_scale_train_valid_test(df_stock["5. adjusted close"], ns_parser) if is_error: print("Error preparing data") return print( f"Training on {X_train.shape[0]} sequences of length {X_train.shape[1]}. Using {X_valid.shape[0]} sequences " f" of length {X_valid.shape[1]} for validation") future_dates = get_next_stock_market_days(dates_forecast_input[-1], n_next_days=ns_parser.n_days) # Machine Learning model knn = neighbors.KNeighborsRegressor(n_neighbors=ns_parser.n_neighbors) knn.fit( X_train.reshape(X_train.shape[0], X_train.shape[1]), y_train.reshape(y_train.shape[0], y_train.shape[1]), ) preds = knn.predict(X_valid.reshape(X_valid.shape[0], X_valid.shape[1])) forecast_data = knn.predict(forecast_data_input.reshape(1, -1)) forecast_data_df = pd.DataFrame(forecast_data.T, index=future_dates) print_pretty_prediction(forecast_data_df[0], df_stock["5. adjusted close"].values[-1]) plot_data_predictions( df_stock, preds, y_valid, y_dates_valid, scaler, f"KNN Model with {ns_parser.n_neighbors} Neighbors on {s_ticker}", forecast_data_df, 1, ) except Exception as e: print(e) print("")
def regression(l_args, s_ticker, df_stock, polynomial): parser = argparse.ArgumentParser( add_help=False, prog="regression", description=""" Regression attempts to model the relationship between two variables by fitting a linear/quadratic/cubic/other equation to observed data. One variable is considered to be an explanatory variable, and the other is considered to be a dependent variable. """, ) parser.add_argument( "-i", "--input", action="store", dest="n_inputs", type=check_positive, default=40, help="number of days to use for prediction.", ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days.", ) parser.add_argument( "-j", "--jumps", action="store", dest="n_jumps", type=check_positive, default=1, help="number of jumps in training data.", ) parser.add_argument( "-e", "--end", action="store", type=valid_date, dest="s_end_date", default=None, help="The end date (format YYYY-MM-DD) to select - Backtesting", ) if polynomial == USER_INPUT: parser.add_argument( "-p", "--polynomial", action="store", dest="n_polynomial", type=check_positive, required=True, help="polynomial associated with regression.", ) try: ns_parser = parse_known_args_and_warn(parser, l_args) if not ns_parser: return # BACKTESTING if ns_parser.s_end_date: if ns_parser.s_end_date < df_stock.index[0]: print( "Backtesting not allowed, since End Date is older than Start Date of historical data\n" ) return if ns_parser.s_end_date < get_next_stock_market_days( last_stock_day=df_stock.index[0], n_next_days=ns_parser.n_inputs + ns_parser.n_days, )[-1]: print( "Backtesting not allowed, since End Date is too close to Start Date to train model\n" ) return future_index = get_next_stock_market_days( last_stock_day=ns_parser.s_end_date, n_next_days=ns_parser.n_days) if future_index[-1] > datetime.datetime.now(): print( "Backtesting not allowed, since End Date + Prediction days is in the future\n" ) return df_future = df_stock[future_index[0]:future_index[-1]] df_stock = df_stock[:ns_parser.s_end_date] # Split training data stock_x, stock_y = splitTrain.split_train( df_stock["5. adjusted close"].values, ns_parser.n_inputs, ns_parser.n_days, ns_parser.n_jumps, ) if not stock_x: print("Given the model parameters more training data is needed.\n") return # Machine Learning model if polynomial == LINEAR: model = linear_model.LinearRegression(n_jobs=-1) else: if polynomial == USER_INPUT: polynomial = ns_parser.n_polynomial model = pipeline.make_pipeline( preprocessing.PolynomialFeatures(polynomial), linear_model.Ridge()) model.fit(stock_x, stock_y) l_predictions = model.predict( df_stock["5. adjusted close"].values[-ns_parser.n_inputs:].reshape( 1, -1))[0] # Prediction data l_pred_days = get_next_stock_market_days( last_stock_day=df_stock["5. adjusted close"].index[-1], n_next_days=ns_parser.n_days, ) df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price") # Plotting plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2) # BACKTESTING if ns_parser.s_end_date: plt.title( f"BACKTESTING: Regression (polynomial {polynomial}) on {s_ticker} - {ns_parser.n_days} days prediction" ) else: plt.title( f"Regression (polynomial {polynomial}) on {s_ticker} - {ns_parser.n_days} days prediction" ) plt.xlim(df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1]) plt.xlabel("Time") plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["5. adjusted close"].values[-1], df_pred.values[0]], lw=1, c="tab:green", linestyle="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="tab:green") plt.axvspan(df_stock.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2) _, _, ymin, ymax = plt.axis() plt.vlines(df_stock.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k") # BACKTESTING if ns_parser.s_end_date: plt.plot( df_future.index, df_future["5. adjusted close"], lw=2, c="tab:blue", ls="--", ) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["5. adjusted close"].values[-1], df_future["5. adjusted close"].values[0], ], lw=1, c="tab:blue", linestyle="--", ) if gtff.USE_ION: plt.ion() plt.show() # BACKTESTING if ns_parser.s_end_date: plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.subplot(211) plt.plot( df_future.index, df_future["5. adjusted close"], lw=2, c="tab:blue", ls="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="green") plt.scatter(df_future.index, df_future["5. adjusted close"], c="tab:blue", lw=3) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["5. adjusted close"].values[-1], df_future["5. adjusted close"].values[0], ], lw=2, c="tab:blue", ls="--", ) plt.scatter(df_pred.index, df_pred, c="green", lw=3) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["5. adjusted close"].values[-1], df_pred.values[0]], lw=2, c="green", ls="--", ) plt.title("BACKTESTING: Real data price versus Prediction") plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)) plt.xticks( [ df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1) ], visible=True, ) plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) plt.xticks([]) plt.subplot(212) plt.axhline(y=0, color="k", linestyle="--", linewidth=2) plt.plot( df_future.index, 100 * (df_pred.values - df_future["5. adjusted close"].values) / df_future["5. adjusted close"].values, lw=2, c="red", ) plt.scatter( df_future.index, 100 * (df_pred.values - df_future["5. adjusted close"].values) / df_future["5. adjusted close"].values, c="red", lw=5, ) plt.title( "BACKTESTING: Error between Real data and Prediction [%]") plt.plot( [df_stock.index[-1], df_future.index[0]], [ 0, 100 * (df_pred.values[0] - df_future["5. adjusted close"].values[0]) / df_future["5. adjusted close"].values[0], ], lw=2, ls="--", c="red", ) plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)) plt.xticks( [ df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1) ], visible=True, ) plt.xlabel("Time") plt.ylabel("Prediction Error (%)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) if gtff.USE_ION: plt.ion() plt.show() # Refactor prediction dataframe for backtesting print df_pred.name = "Prediction" df_pred = df_pred.to_frame() df_pred["Real"] = df_future["5. adjusted close"] if gtff.USE_COLOR: patch_pandas_text_adjustment() print("Time Real [$] x Prediction [$]") print( df_pred.apply(price_prediction_backtesting_color, axis=1).to_string()) else: print(df_pred[["Real", "Prediction"]].round(2).to_string()) print("") print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values) else: # Print prediction data print_pretty_prediction(df_pred, df_stock["5. adjusted close"].values[-1]) print("") except SystemExit: print("") except Exception as e: print(e) print("")
def exponential_smoothing(l_args, s_ticker, df_stock): parser = argparse.ArgumentParser( add_help=False, prog="ets", description=""" Exponential Smoothing, see https://otexts.com/fpp2/taxonomy.html Trend='N', Seasonal='N': Simple Exponential Smoothing Trend='N', Seasonal='A': Exponential Smoothing Trend='N', Seasonal='M': Exponential Smoothing Trend='A', Seasonal='N': Holt’s linear method Trend='A', Seasonal='A': Additive Holt-Winters’ method Trend='A', Seasonal='M': Multiplicative Holt-Winters’ method Trend='Ad', Seasonal='N': Additive damped trend method Trend='Ad', Seasonal='A': Exponential Smoothing Trend='Ad', Seasonal='M': Holt-Winters’ damped method Trend component: N: None, A: Additive, Ad: Additive Damped Seasonality component: N: None, A: Additive, M: Multiplicative """, ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days.", ) parser.add_argument( "-t", "--trend", action="store", dest="trend", type=check_valid_trend, default="N", help="Trend component: N: None, A: Additive, Ad: Additive Damped.", ) parser.add_argument( "-s", "--seasonal", action="store", dest="seasonal", type=check_valid_seasonal, default="N", help="Seasonality component: N: None, A: Additive, M: Multiplicative.", ) parser.add_argument( "-p", "--periods", action="store", dest="seasonal_periods", type=check_positive, default=5, help="Seasonal periods.", ) parser.add_argument( "-e", "--end", action="store", type=valid_date, dest="s_end_date", default=None, help="The end date (format YYYY-MM-DD) to select - Backtesting", ) try: ns_parser = parse_known_args_and_warn(parser, l_args) if not ns_parser: return # BACKTESTING if ns_parser.s_end_date: if ns_parser.s_end_date < df_stock.index[0]: print( "Backtesting not allowed, since End Date is older than Start Date of historical data\n" ) return if (ns_parser.s_end_date < get_next_stock_market_days( last_stock_day=df_stock.index[0], n_next_days=5 + ns_parser.n_days)[-1]): print( "Backtesting not allowed, since End Date is too close to Start Date to train model\n" ) return future_index = get_next_stock_market_days( last_stock_day=ns_parser.s_end_date, n_next_days=ns_parser.n_days) if future_index[-1] > datetime.datetime.now(): print( "Backtesting not allowed, since End Date + Prediction days is in the future\n" ) return df_future = df_stock[future_index[0]:future_index[-1]] df_stock = df_stock[:ns_parser.s_end_date] # Get ETS model model, title = get_exponential_smoothing_model( df_stock["5. adjusted close"].values, ns_parser.trend, ns_parser.seasonal, ns_parser.seasonal_periods, ) if model.mle_retvals.success: forecast = model.forecast(ns_parser.n_days) l_pred_days = get_next_stock_market_days( last_stock_day=df_stock["5. adjusted close"].index[-1], n_next_days=ns_parser.n_days, ) df_pred = pd.Series(forecast, index=l_pred_days, name="Price") if ~np.isnan(forecast).any(): print(f"\n{title}") print("\nFit model parameters:") for key, value in model.params.items(): print(f"{key} {' '*(18-len(key))}: {value}") print("\nAssess fit model:") print(f"AIC: {round(model.aic, 2)}") print(f"BIC: {round(model.bic, 2)}") print(f"SSE: {round(model.sse, 2)}\n") # Plotting plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2) # BACKTESTING if ns_parser.s_end_date: plt.title(f"BACKTESTING: {title} on {s_ticker}") else: plt.title(f"{title} on {s_ticker}") plt.xlim( df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1], ) plt.xlabel("Time") plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.plot( [df_stock.index[-1], df_pred.index[0]], [ df_stock["5. adjusted close"].values[-1], df_pred.values[0] ], lw=1, c="tab:green", linestyle="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="tab:green") plt.axvspan( df_stock.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2, ) _, _, ymin, ymax = plt.axis() plt.vlines( df_stock.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k", ) # BACKTESTING if ns_parser.s_end_date: plt.plot( df_future.index, df_future["5. adjusted close"], lw=2, c="tab:blue", ls="--", ) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["5. adjusted close"].values[-1], df_future["5. adjusted close"].values[0], ], lw=1, c="tab:blue", linestyle="--", ) if gtff.USE_ION: plt.ion() plt.show() # BACKTESTING if ns_parser.s_end_date: plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.subplot(211) plt.plot( df_future.index, df_future["5. adjusted close"], lw=2, c="tab:blue", ls="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="green") plt.scatter( df_future.index, df_future["5. adjusted close"], c="tab:blue", lw=3, ) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["5. adjusted close"].values[-1], df_future["5. adjusted close"].values[0], ], lw=2, c="tab:blue", ls="--", ) plt.scatter(df_pred.index, df_pred, c="green", lw=3) plt.plot( [df_stock.index[-1], df_pred.index[0]], [ df_stock["5. adjusted close"].values[-1], df_pred.values[0] ], lw=2, c="green", ls="--", ) plt.title("BACKTESTING: Real data price versus Prediction") plt.xlim( df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1), ) plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) plt.xticks([]) plt.subplot(212) plt.axhline(y=0, color="k", linestyle="--", linewidth=2) plt.plot( df_future.index, 100 * (df_pred.values - df_future["5. adjusted close"].values) / df_future["5. adjusted close"].values, lw=2, c="red", ) plt.scatter( df_future.index, 100 * (df_pred.values - df_future["5. adjusted close"].values) / df_future["5. adjusted close"].values, c="red", lw=5, ) plt.title( "BACKTESTING: Error between Real data and Prediction [%]" ) plt.plot( [df_stock.index[-1], df_future.index[0]], [ 0, 100 * (df_pred.values[0] - df_future["5. adjusted close"].values[0]) / df_future["5. adjusted close"].values[0], ], lw=2, ls="--", c="red", ) plt.xlim( df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1), ) plt.xlabel("Time") plt.ylabel("Prediction Error (%)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) if gtff.USE_ION: plt.ion() plt.show() # Refactor prediction dataframe for backtesting print df_pred.name = "Prediction" df_pred = df_pred.to_frame() df_pred["Real"] = df_future["5. adjusted close"] if gtff.USE_COLOR: patch_pandas_text_adjustment() print("Time Real [$] x Prediction [$]") print( df_pred.apply(price_prediction_backtesting_color, axis=1).to_string()) else: print(df_pred[["Real", "Prediction"]].round(2).to_string()) print("") print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values) else: # Print prediction data print_pretty_prediction( df_pred, df_stock["5. adjusted close"].values[-1]) print("") else: print( "RuntimeWarning: invalid value encountered in double_scalars." ) else: print("ConvergenceWarning: Optimization failed to converge.") except Exception as e: print(e) print("")
def simple_moving_average(l_args, s_ticker, df_stock): parser = argparse.ArgumentParser( add_help=False, prog="sma", description=""" Moving Averages are used to smooth the data in an array to help eliminate noise and identify trends. The Simple Moving Average is literally the simplest form of a moving average. Each output value is the average of the previous n values. In a Simple Moving Average, each value in the time period carries equal weight, and values outside of the time period are not included in the average. This makes it less responsive to recent changes in the data, which can be useful for filtering out those changes. """, ) parser.add_argument( "-l", "--length", action="store", dest="n_length", type=check_positive, default=20, help="length of SMA window.", ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days.", ) parser.add_argument( "-e", "--end", action="store", type=valid_date, dest="s_end_date", default=None, help="The end date (format YYYY-MM-DD) to select - Backtesting", ) try: ns_parser = parse_known_args_and_warn(parser, l_args) if not ns_parser: return # BACKTESTING if ns_parser.s_end_date: if ns_parser.s_end_date < df_stock.index[0]: print( "Backtesting not allowed, since End Date is older than Start Date of historical data\n" ) return if ( ns_parser.s_end_date < get_next_stock_market_days( last_stock_day=df_stock.index[0], n_next_days=5 + ns_parser.n_days )[-1] ): print( "Backtesting not allowed, since End Date is too close to Start Date to train model\n" ) return future_index = get_next_stock_market_days( last_stock_day=ns_parser.s_end_date, n_next_days=ns_parser.n_days ) if future_index[-1] > datetime.datetime.now(): print( "Backtesting not allowed, since End Date + Prediction days is in the future\n" ) return df_future = df_stock[future_index[0] : future_index[-1]] df_stock = df_stock[: ns_parser.s_end_date] # Prediction data l_predictions = list() for pred_day in range(ns_parser.n_days): if pred_day < ns_parser.n_length: l_ma_stock = df_stock["5. adjusted close"].values[ -ns_parser.n_length + pred_day : ] else: l_ma_stock = list() l_predictions.append(np.mean(np.append(l_ma_stock, l_predictions))) l_pred_days = get_next_stock_market_days( last_stock_day=df_stock["5. adjusted close"].index[-1], n_next_days=ns_parser.n_days, ) df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price") # Plotting plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2) # BACKTESTING if ns_parser.s_end_date: plt.title( f"BACKTESTING: {ns_parser.n_length} Moving Average on {s_ticker} - {ns_parser.n_days} days prediction" ) else: plt.title( f"{ns_parser.n_length} Moving Average on {s_ticker} - {ns_parser.n_days} days prediction" ) plt.xlim( df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1] ) plt.xlabel("Time") plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) df_ma = df_stock["5. adjusted close"].rolling(window=ns_parser.n_length).mean() plt.plot(df_ma.index, df_ma, lw=2, linestyle="--", c="tab:orange") plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["5. adjusted close"].values[-1], df_pred.values[0]], lw=1, c="tab:green", linestyle="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="tab:green") plt.axvspan( df_stock.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2 ) _, _, ymin, ymax = plt.axis() plt.vlines( df_stock.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k" ) # BACKTESTING if ns_parser.s_end_date: plt.plot( df_future.index, df_future["5. adjusted close"], lw=2, c="tab:blue", ls="--", ) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["5. adjusted close"].values[-1], df_future["5. adjusted close"].values[0], ], lw=1, c="tab:blue", linestyle="--", ) if gtff.USE_ION: plt.ion() plt.show() # BACKTESTING if ns_parser.s_end_date: plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.subplot(211) plt.plot( df_future.index, df_future["5. adjusted close"], lw=2, c="tab:blue", ls="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="green") plt.scatter( df_future.index, df_future["5. adjusted close"], c="tab:blue", lw=3 ) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["5. adjusted close"].values[-1], df_future["5. adjusted close"].values[0], ], lw=2, c="tab:blue", ls="--", ) plt.scatter(df_pred.index, df_pred, c="green", lw=3) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["5. adjusted close"].values[-1], df_pred.values[0]], lw=2, c="green", ls="--", ) plt.title("BACKTESTING: Real data price versus Prediction") plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)) plt.xticks( [df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)], visible=True, ) plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) plt.xticks([]) plt.subplot(212) plt.axhline(y=0, color="k", linestyle="--", linewidth=2) plt.plot( df_future.index, 100 * (df_pred.values - df_future["5. adjusted close"].values) / df_future["5. adjusted close"].values, lw=2, c="red", ) plt.scatter( df_future.index, 100 * (df_pred.values - df_future["5. adjusted close"].values) / df_future["5. adjusted close"].values, c="red", lw=5, ) plt.title("BACKTESTING: Error between Real data and Prediction [%]") plt.plot( [df_stock.index[-1], df_future.index[0]], [ 0, 100 * (df_pred.values[0] - df_future["5. adjusted close"].values[0]) / df_future["5. adjusted close"].values[0], ], lw=2, ls="--", c="red", ) plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)) plt.xticks( [df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)], visible=True, ) plt.xlabel("Time") plt.ylabel("Prediction Error (%)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) if gtff.USE_ION: plt.ion() plt.show() # Refactor prediction dataframe for backtesting print df_pred.name = "Prediction" df_pred = df_pred.to_frame() df_pred["Real"] = df_future["5. adjusted close"] if gtff.USE_COLOR: patch_pandas_text_adjustment() print("Time Real [$] x Prediction [$]") print( df_pred.apply( price_prediction_backtesting_color, axis=1 ).to_string() ) else: print(df_pred[["Real", "Prediction"]].round(2).to_string()) print("") print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values) else: # Print prediction data print_pretty_prediction(df_pred, df_stock["5. adjusted close"].values[-1]) print("") except Exception as e: print(e) print("")