def display_k_nearest_neighbors( ticker: str, data: Union[pd.DataFrame, pd.Series], n_neighbors: int, n_input_days: int, n_predict_days: int, test_size: float, end_date: str = "", no_shuffle: bool = True, ): """Display predictions using knn Parameters ---------- ticker : str Stock data data : Union[pd.DataFrame, pd.Series] Data to use for ML n_neighbors : int Number of neighborns for knn n_input_days : int Length of input sequences n_predict_days : int Number of days to predict test_size : float Fraction of data for testing end_date : str, optional End date for backtesting, by default "" no_shuffle : bool, optional Flag to shuffle data randomly, by default True """ ( forecast_data_df, preds, y_valid, y_dates_valid, scaler, ) = knn_model.get_knn_model_data( data, n_input_days, n_predict_days, n_neighbors, test_size, end_date, no_shuffle ) if forecast_data_df.empty: print("Issue performing data prep and prediction") return print_pretty_prediction(forecast_data_df[0], data.values[-1]) plot_data_predictions( data, preds, y_valid, y_dates_valid, scaler, f"KNN Model with {n_neighbors} Neighbors on {ticker}", forecast_data_df, 1, ) print("")
def display_rnn( dataset: str, data: Union[pd.Series, pd.DataFrame], n_input_days: int, n_predict_days: int, learning_rate: float, epochs: int, batch_size: int, test_size: float, n_loops: int, no_shuffle: bool, time_res: str = "", ): """Display trained RNN model Parameters ---------- dataset : str Dataset for model data : Union[pd.Series, pd.DataFrame] Data to feed to model n_input_days : int Number of inputs to train n_predict_days : int Number of outputs to predict learning_rate : float MLP learning rate epochs : int Number of training epochs batch_size : int Training batch size test_size : float Size of test set n_loops : int Number of loops to perform for model no_shuffle : bool Flag to not randomly shuffle data time_res : str Resolution for data, allowing for predicting outside of standard market days """ ( forecast_data_df, preds, y_valid, y_dates_valid, scaler, ) = neural_networks_model.rnn_model( data, n_input_days, n_predict_days, learning_rate, epochs, batch_size, test_size, n_loops, no_shuffle, ) if time_res: forecast_data_df.index = pd.date_range(data.index[-1], periods=n_predict_days + 1, freq=time_res)[1:] if n_loops > 1: forecast_data_df["Median"] = forecast_data_df.median(axis=1) print_pretty_prediction(forecast_data_df["Median"], data.values[-1]) else: print_pretty_prediction(forecast_data_df[0], data.values[-1]) plot_data_predictions( data, np.median(preds, axis=0), y_valid, y_dates_valid, scaler, f"RNN Model on {dataset}", forecast_data_df, n_loops, time_res, ) console.print("")
def display_k_nearest_neighbors( ticker: str, data: Union[pd.DataFrame, pd.Series], n_neighbors: int, n_input_days: int, n_predict_days: int, test_size: float, end_date: str = "", no_shuffle: bool = True, time_res: str = "", external_axes: Optional[List[plt.Axes]] = None, ): """Display predictions using knn Parameters ---------- ticker : str Stock data data : Union[pd.DataFrame, pd.Series] Data to use for ML n_neighbors : int Number of neighbors for knn n_input_days : int Length of input sequences n_predict_days : int Number of days to predict test_size : float Fraction of data for testing end_date : str, optional End date for backtesting, by default "" no_shuffle : bool, optional Flag to shuffle data randomly, by default True time_res : str Resolution for data, allowing for predicting outside of standard market days external_axes : Optional[List[plt.Axes]], optional External axes (1 axis is expected in the list), by default None """ ( forecast_data_df, preds, y_valid, y_dates_valid, scaler, ) = knn_model.get_knn_model_data( data, n_input_days, n_predict_days, n_neighbors, test_size, end_date, no_shuffle ) if forecast_data_df.empty: console.print("Issue performing data prep and prediction") return if time_res: forecast_data_df.index = pd.date_range( data.index[-1], periods=n_predict_days + 1, freq=time_res )[1:] print_pretty_prediction(forecast_data_df[0], data.values[-1]) plot_data_predictions( data=data, preds=preds, y_valid=y_valid, y_dates_valid=y_dates_valid, scaler=scaler, title=f"KNN Model with {n_neighbors} Neighbors on {ticker}", forecast_data=forecast_data_df, n_loops=1, time_str=time_res, external_axes=external_axes, ) console.print("")
def conv1d(other_args: List[str], s_ticker: str, df_stock: pd.DataFrame): """ Train a 1D Convolutional Neural Net (1D CNN) Parameters ---------- other_args:List[str] Argparse arguments s_ticker: str Stock ticker df_stock: pd.DataFrame Dataframe of stock prices """ try: ns_parser = parse_args( prog="conv1d", description="""1D CNN.""", other_args=other_args, ) if not ns_parser: return ( X_train, X_valid, y_train, y_valid, _, _, _, y_dates_valid, forecast_data_input, dates_forecast_input, scaler, is_error, ) = prepare_scale_train_valid_test(df_stock["Adj Close"], ns_parser) if is_error: return print( f"Training on {X_train.shape[0]} sequences of length {X_train.shape[1]}. Using {X_valid.shape[0]} sequences " f" of length {X_valid.shape[1]} for validation. Model will run {ns_parser.n_loops} loops" ) future_dates = get_next_stock_market_days(dates_forecast_input[-1], n_next_days=ns_parser.n_days) preds = np.zeros( (ns_parser.n_loops, X_valid.shape[0], ns_parser.n_days)) forecast_data = np.zeros((ns_parser.n_loops, ns_parser.n_days)) for i in range(ns_parser.n_loops): # Build Neural Network model model = build_neural_network_model( cfg_nn_models.Convolutional, ns_parser.n_inputs, ns_parser.n_days, ) model.compile( optimizer=optimizers[cfg_nn_models.Optimizer]( learning_rate=ns_parser.lr), loss=cfg_nn_models.Loss, ) model.fit( X_train.reshape(X_train.shape[0], X_train.shape[1], 1), y_train, epochs=ns_parser.n_epochs, verbose=True, batch_size=ns_parser.n_batch_size, validation_data=( X_valid.reshape(X_valid.shape[0], X_valid.shape[1], 1), y_valid, ), callbacks=[es], ) preds[i] = model.predict( X_valid.reshape(X_valid.shape[0], X_valid.shape[1], 1)).reshape(X_valid.shape[0], ns_parser.n_days) forecast_data[i] = forecast(forecast_data_input, future_dates, model, scaler).values.flat forecast_data_df = pd.DataFrame(forecast_data.T, index=future_dates) if ns_parser.n_loops > 1: forecast_data_df["Median"] = forecast_data_df.median(axis=1) print_pretty_prediction(forecast_data_df["Median"], df_stock["Adj Close"].values[-1]) else: print_pretty_prediction(forecast_data_df[0], df_stock["Adj Close"].values[-1]) plot_data_predictions( df_stock, np.median(preds, axis=0), y_valid, y_dates_valid, scaler, f"Conv1D Model on {s_ticker}", forecast_data_df, ns_parser.n_loops, ) print("") except Exception as e: print(e) traceback.print_exc() print("") finally: restore_env()
def k_nearest_neighbors(other_args: List[str], s_ticker: str, df_stock: pd.DataFrame): """ Train KNN model Parameters ---------- other_args: List[str] List of argparse arguments s_ticker: str Ticker df_stock: pd.DataFrame Dataframe of stock prices Returns ------- """ parser = argparse.ArgumentParser( add_help=False, prog="knn", description=""" K nearest neighbors is a simple algorithm that stores all available cases and predict the numerical target based on a similarity measure (e.g. distance functions). """, ) parser.add_argument( "-i", "--input", action="store", dest="n_inputs", type=check_positive, default=40, help="number of days to use as input for prediction.", ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days.", ) parser.add_argument( "-j", "--jumps", action="store", dest="n_jumps", type=check_positive, default=1, help="number of jumps in training data.", ) parser.add_argument( "-n", "--neighbors", action="store", dest="n_neighbors", type=check_positive, default=20, help="number of neighbors to use on the algorithm.", ) parser.add_argument( "-e", "--end", action="store", type=valid_date, dest="s_end_date", default=None, help="The end date (format YYYY-MM-DD) to select for testing", ) parser.add_argument( "-t", "--test_size", default=0.2, dest="valid_split", type=float, help="Percentage of data to validate in sample", ) parser.add_argument( "-p", "--pp", action="store", dest="s_preprocessing", default="none", choices=["normalization", "standardization", "minmax", "none"], help="pre-processing data.", ) parser.add_argument( "--no_shuffle", action="store_false", dest="no_shuffle", default=True, help="Specify if shuffling validation inputs.", ) try: ns_parser = parse_known_args_and_warn(parser, other_args) if not ns_parser: return ( X_train, X_valid, y_train, y_valid, _, _, _, y_dates_valid, forecast_data_input, dates_forecast_input, scaler, is_error, ) = prepare_scale_train_valid_test(df_stock["Adj Close"], ns_parser) if is_error: print("Error preparing data") return print( f"Training on {X_train.shape[0]} sequences of length {X_train.shape[1]}. Using {X_valid.shape[0]} sequences " f" of length {X_valid.shape[1]} for validation") future_dates = get_next_stock_market_days(dates_forecast_input[-1], n_next_days=ns_parser.n_days) # Machine Learning model knn = neighbors.KNeighborsRegressor(n_neighbors=ns_parser.n_neighbors) knn.fit( X_train.reshape(X_train.shape[0], X_train.shape[1]), y_train.reshape(y_train.shape[0], y_train.shape[1]), ) preds = knn.predict(X_valid.reshape(X_valid.shape[0], X_valid.shape[1])) forecast_data = knn.predict(forecast_data_input.reshape(1, -1)) forecast_data_df = pd.DataFrame( [i if i > 0 else 0 for i in forecast_data.T], index=future_dates) print_pretty_prediction(forecast_data_df[0], df_stock["Adj Close"].values[-1]) plot_data_predictions( df_stock, preds, y_valid, y_dates_valid, scaler, f"KNN Model with {ns_parser.n_neighbors} Neighbors on {s_ticker}", forecast_data_df, 1, ) print("") except Exception as e: print(e) print("")
def display_rnn( dataset: str, data: Union[pd.Series, pd.DataFrame], n_input_days: int, n_predict_days: int, learning_rate: float, epochs: int, batch_size: int, test_size: float, n_loops: int, no_shuffle: bool, ): """Display trained RNN model Parameters ---------- dataset : str Dataset for model data : Union[pd.Series, pd.DataFrame] Data to feed to model n_input_days : int Number of inputs to train n_predict_days : int Number of outputs to predict learning_rate : float MLP learning rate epochs : int Number of training epochs batch_size : int Training batch size test_size : float Size of test set n_loops : int Number of loops to perform for model no_shuffle : bool Flag to not randomly shuffle data """ ( forecast_data_df, preds, y_valid, y_dates_valid, scaler, ) = neural_networks_model.rnn_model( data, n_input_days, n_predict_days, learning_rate, epochs, batch_size, test_size, n_loops, no_shuffle, ) if n_loops > 1: forecast_data_df["Median"] = forecast_data_df.median(axis=1) print_pretty_prediction(forecast_data_df["Median"], data.values[-1]) else: print_pretty_prediction(forecast_data_df[0], data.values[-1]) plot_data_predictions( data, np.median(preds, axis=0), y_valid, y_dates_valid, scaler, f"RNN Model on {dataset}", forecast_data_df, n_loops, ) print("")