Пример #1
0
def display_k_nearest_neighbors(
    ticker: str,
    data: Union[pd.DataFrame, pd.Series],
    n_neighbors: int,
    n_input_days: int,
    n_predict_days: int,
    test_size: float,
    end_date: str = "",
    no_shuffle: bool = True,
):
    """Display predictions using knn

    Parameters
    ----------
    ticker : str
        Stock data
    data : Union[pd.DataFrame, pd.Series]
        Data to use for ML
    n_neighbors : int
        Number of neighborns for knn
    n_input_days : int
        Length of input sequences
    n_predict_days : int
        Number of days to predict
    test_size : float
        Fraction of data for testing
    end_date : str, optional
        End date for backtesting, by default ""
    no_shuffle : bool, optional
        Flag to shuffle data randomly, by default True
    """
    (
        forecast_data_df,
        preds,
        y_valid,
        y_dates_valid,
        scaler,
    ) = knn_model.get_knn_model_data(
        data, n_input_days, n_predict_days, n_neighbors, test_size, end_date, no_shuffle
    )
    if forecast_data_df.empty:
        print("Issue performing data prep and prediction")
        return

    print_pretty_prediction(forecast_data_df[0], data.values[-1])
    plot_data_predictions(
        data,
        preds,
        y_valid,
        y_dates_valid,
        scaler,
        f"KNN Model with {n_neighbors} Neighbors on {ticker}",
        forecast_data_df,
        1,
    )
    print("")
Пример #2
0
def display_rnn(
    dataset: str,
    data: Union[pd.Series, pd.DataFrame],
    n_input_days: int,
    n_predict_days: int,
    learning_rate: float,
    epochs: int,
    batch_size: int,
    test_size: float,
    n_loops: int,
    no_shuffle: bool,
    time_res: str = "",
):
    """Display trained RNN model

    Parameters
    ----------
    dataset : str
        Dataset for model
    data : Union[pd.Series, pd.DataFrame]
        Data to feed to model
    n_input_days : int
        Number of inputs to train
    n_predict_days : int
        Number of outputs to predict
    learning_rate : float
        MLP learning rate
    epochs : int
        Number of training epochs
    batch_size : int
        Training batch size
    test_size : float
        Size of test set
    n_loops : int
        Number of loops to perform for model
    no_shuffle : bool
        Flag to not randomly shuffle data
    time_res : str
        Resolution for data, allowing for predicting outside of standard market days
    """

    (
        forecast_data_df,
        preds,
        y_valid,
        y_dates_valid,
        scaler,
    ) = neural_networks_model.rnn_model(
        data,
        n_input_days,
        n_predict_days,
        learning_rate,
        epochs,
        batch_size,
        test_size,
        n_loops,
        no_shuffle,
    )
    if time_res:
        forecast_data_df.index = pd.date_range(data.index[-1],
                                               periods=n_predict_days + 1,
                                               freq=time_res)[1:]
    if n_loops > 1:
        forecast_data_df["Median"] = forecast_data_df.median(axis=1)
        print_pretty_prediction(forecast_data_df["Median"], data.values[-1])
    else:
        print_pretty_prediction(forecast_data_df[0], data.values[-1])
    plot_data_predictions(
        data,
        np.median(preds, axis=0),
        y_valid,
        y_dates_valid,
        scaler,
        f"RNN Model on {dataset}",
        forecast_data_df,
        n_loops,
        time_res,
    )
    console.print("")
Пример #3
0
def display_k_nearest_neighbors(
    ticker: str,
    data: Union[pd.DataFrame, pd.Series],
    n_neighbors: int,
    n_input_days: int,
    n_predict_days: int,
    test_size: float,
    end_date: str = "",
    no_shuffle: bool = True,
    time_res: str = "",
    external_axes: Optional[List[plt.Axes]] = None,
):
    """Display predictions using knn

    Parameters
    ----------
    ticker : str
        Stock data
    data : Union[pd.DataFrame, pd.Series]
        Data to use for ML
    n_neighbors : int
        Number of neighbors for knn
    n_input_days : int
        Length of input sequences
    n_predict_days : int
        Number of days to predict
    test_size : float
        Fraction of data for testing
    end_date : str, optional
        End date for backtesting, by default ""
    no_shuffle : bool, optional
        Flag to shuffle data randomly, by default True
    time_res : str
        Resolution for data, allowing for predicting outside of standard market days
    external_axes : Optional[List[plt.Axes]], optional
        External axes (1 axis is expected in the list), by default None
    """
    (
        forecast_data_df,
        preds,
        y_valid,
        y_dates_valid,
        scaler,
    ) = knn_model.get_knn_model_data(
        data, n_input_days, n_predict_days, n_neighbors, test_size, end_date, no_shuffle
    )

    if forecast_data_df.empty:
        console.print("Issue performing data prep and prediction")
        return

    if time_res:
        forecast_data_df.index = pd.date_range(
            data.index[-1], periods=n_predict_days + 1, freq=time_res
        )[1:]
    print_pretty_prediction(forecast_data_df[0], data.values[-1])
    plot_data_predictions(
        data=data,
        preds=preds,
        y_valid=y_valid,
        y_dates_valid=y_dates_valid,
        scaler=scaler,
        title=f"KNN Model with {n_neighbors} Neighbors on {ticker}",
        forecast_data=forecast_data_df,
        n_loops=1,
        time_str=time_res,
        external_axes=external_axes,
    )
    console.print("")
Пример #4
0
def conv1d(other_args: List[str], s_ticker: str, df_stock: pd.DataFrame):
    """
    Train a 1D Convolutional Neural Net (1D CNN)
    Parameters
    ----------
    other_args:List[str]
        Argparse arguments
    s_ticker: str
        Stock ticker
    df_stock: pd.DataFrame
        Dataframe of stock prices
    """
    try:
        ns_parser = parse_args(
            prog="conv1d",
            description="""1D CNN.""",
            other_args=other_args,
        )
        if not ns_parser:
            return
        (
            X_train,
            X_valid,
            y_train,
            y_valid,
            _,
            _,
            _,
            y_dates_valid,
            forecast_data_input,
            dates_forecast_input,
            scaler,
            is_error,
        ) = prepare_scale_train_valid_test(df_stock["Adj Close"], ns_parser)
        if is_error:
            return
        print(
            f"Training on {X_train.shape[0]} sequences of length {X_train.shape[1]}.  Using {X_valid.shape[0]} sequences "
            f" of length {X_valid.shape[1]} for validation. Model will run {ns_parser.n_loops} loops"
        )
        future_dates = get_next_stock_market_days(dates_forecast_input[-1],
                                                  n_next_days=ns_parser.n_days)

        preds = np.zeros(
            (ns_parser.n_loops, X_valid.shape[0], ns_parser.n_days))
        forecast_data = np.zeros((ns_parser.n_loops, ns_parser.n_days))
        for i in range(ns_parser.n_loops):
            # Build Neural Network model
            model = build_neural_network_model(
                cfg_nn_models.Convolutional,
                ns_parser.n_inputs,
                ns_parser.n_days,
            )

            model.compile(
                optimizer=optimizers[cfg_nn_models.Optimizer](
                    learning_rate=ns_parser.lr),
                loss=cfg_nn_models.Loss,
            )

            model.fit(
                X_train.reshape(X_train.shape[0], X_train.shape[1], 1),
                y_train,
                epochs=ns_parser.n_epochs,
                verbose=True,
                batch_size=ns_parser.n_batch_size,
                validation_data=(
                    X_valid.reshape(X_valid.shape[0], X_valid.shape[1], 1),
                    y_valid,
                ),
                callbacks=[es],
            )

            preds[i] = model.predict(
                X_valid.reshape(X_valid.shape[0], X_valid.shape[1],
                                1)).reshape(X_valid.shape[0], ns_parser.n_days)
            forecast_data[i] = forecast(forecast_data_input, future_dates,
                                        model, scaler).values.flat

        forecast_data_df = pd.DataFrame(forecast_data.T, index=future_dates)
        if ns_parser.n_loops > 1:
            forecast_data_df["Median"] = forecast_data_df.median(axis=1)
            print_pretty_prediction(forecast_data_df["Median"],
                                    df_stock["Adj Close"].values[-1])
        else:
            print_pretty_prediction(forecast_data_df[0],
                                    df_stock["Adj Close"].values[-1])
        plot_data_predictions(
            df_stock,
            np.median(preds, axis=0),
            y_valid,
            y_dates_valid,
            scaler,
            f"Conv1D Model on {s_ticker}",
            forecast_data_df,
            ns_parser.n_loops,
        )
        print("")

    except Exception as e:
        print(e)
        traceback.print_exc()
        print("")

    finally:
        restore_env()
Пример #5
0
def k_nearest_neighbors(other_args: List[str], s_ticker: str,
                        df_stock: pd.DataFrame):
    """
    Train KNN model
    Parameters
    ----------
    other_args: List[str]
        List of argparse arguments
    s_ticker: str
        Ticker
    df_stock: pd.DataFrame
        Dataframe of stock prices

    Returns
    -------

    """
    parser = argparse.ArgumentParser(
        add_help=False,
        prog="knn",
        description="""
            K nearest neighbors is a simple algorithm that stores all
            available cases and predict the numerical target based on a similarity measure
            (e.g. distance functions).
        """,
    )

    parser.add_argument(
        "-i",
        "--input",
        action="store",
        dest="n_inputs",
        type=check_positive,
        default=40,
        help="number of days to use as input for prediction.",
    )
    parser.add_argument(
        "-d",
        "--days",
        action="store",
        dest="n_days",
        type=check_positive,
        default=5,
        help="prediction days.",
    )
    parser.add_argument(
        "-j",
        "--jumps",
        action="store",
        dest="n_jumps",
        type=check_positive,
        default=1,
        help="number of jumps in training data.",
    )
    parser.add_argument(
        "-n",
        "--neighbors",
        action="store",
        dest="n_neighbors",
        type=check_positive,
        default=20,
        help="number of neighbors to use on the algorithm.",
    )
    parser.add_argument(
        "-e",
        "--end",
        action="store",
        type=valid_date,
        dest="s_end_date",
        default=None,
        help="The end date (format YYYY-MM-DD) to select for testing",
    )

    parser.add_argument(
        "-t",
        "--test_size",
        default=0.2,
        dest="valid_split",
        type=float,
        help="Percentage of data to validate in sample",
    )
    parser.add_argument(
        "-p",
        "--pp",
        action="store",
        dest="s_preprocessing",
        default="none",
        choices=["normalization", "standardization", "minmax", "none"],
        help="pre-processing data.",
    )
    parser.add_argument(
        "--no_shuffle",
        action="store_false",
        dest="no_shuffle",
        default=True,
        help="Specify if shuffling validation inputs.",
    )

    try:
        ns_parser = parse_known_args_and_warn(parser, other_args)
        if not ns_parser:
            return

        (
            X_train,
            X_valid,
            y_train,
            y_valid,
            _,
            _,
            _,
            y_dates_valid,
            forecast_data_input,
            dates_forecast_input,
            scaler,
            is_error,
        ) = prepare_scale_train_valid_test(df_stock["Adj Close"], ns_parser)
        if is_error:
            print("Error preparing data")
            return
        print(
            f"Training on {X_train.shape[0]} sequences of length {X_train.shape[1]}.  Using {X_valid.shape[0]} sequences "
            f" of length {X_valid.shape[1]} for validation")
        future_dates = get_next_stock_market_days(dates_forecast_input[-1],
                                                  n_next_days=ns_parser.n_days)

        # Machine Learning model
        knn = neighbors.KNeighborsRegressor(n_neighbors=ns_parser.n_neighbors)
        knn.fit(
            X_train.reshape(X_train.shape[0], X_train.shape[1]),
            y_train.reshape(y_train.shape[0], y_train.shape[1]),
        )

        preds = knn.predict(X_valid.reshape(X_valid.shape[0],
                                            X_valid.shape[1]))
        forecast_data = knn.predict(forecast_data_input.reshape(1, -1))

        forecast_data_df = pd.DataFrame(
            [i if i > 0 else 0 for i in forecast_data.T], index=future_dates)
        print_pretty_prediction(forecast_data_df[0],
                                df_stock["Adj Close"].values[-1])
        plot_data_predictions(
            df_stock,
            preds,
            y_valid,
            y_dates_valid,
            scaler,
            f"KNN Model with {ns_parser.n_neighbors} Neighbors on {s_ticker}",
            forecast_data_df,
            1,
        )
        print("")

    except Exception as e:
        print(e)
        print("")
Пример #6
0
def display_rnn(
    dataset: str,
    data: Union[pd.Series, pd.DataFrame],
    n_input_days: int,
    n_predict_days: int,
    learning_rate: float,
    epochs: int,
    batch_size: int,
    test_size: float,
    n_loops: int,
    no_shuffle: bool,
):
    """Display trained RNN model

    Parameters
    ----------
    dataset : str
        Dataset for model
    data : Union[pd.Series, pd.DataFrame]
        Data to feed to model
    n_input_days : int
        Number of inputs to train
    n_predict_days : int
        Number of outputs to predict
    learning_rate : float
        MLP learning rate
    epochs : int
        Number of training epochs
    batch_size : int
        Training batch size
    test_size : float
        Size of test set
    n_loops : int
        Number of loops to perform for model
    no_shuffle : bool
        Flag to not randomly shuffle data
    """

    (
        forecast_data_df,
        preds,
        y_valid,
        y_dates_valid,
        scaler,
    ) = neural_networks_model.rnn_model(
        data,
        n_input_days,
        n_predict_days,
        learning_rate,
        epochs,
        batch_size,
        test_size,
        n_loops,
        no_shuffle,
    )

    if n_loops > 1:
        forecast_data_df["Median"] = forecast_data_df.median(axis=1)
        print_pretty_prediction(forecast_data_df["Median"], data.values[-1])
    else:
        print_pretty_prediction(forecast_data_df[0], data.values[-1])
    plot_data_predictions(
        data,
        np.median(preds, axis=0),
        y_valid,
        y_dates_valid,
        scaler,
        f"RNN Model on {dataset}",
        forecast_data_df,
        n_loops,
    )
    print("")