示例#1
0
def display_k_nearest_neighbors(
    ticker: str,
    data: Union[pd.DataFrame, pd.Series],
    n_neighbors: int,
    n_input_days: int,
    n_predict_days: int,
    test_size: float,
    end_date: str = "",
    no_shuffle: bool = True,
):
    """Display predictions using knn

    Parameters
    ----------
    ticker : str
        Stock data
    data : Union[pd.DataFrame, pd.Series]
        Data to use for ML
    n_neighbors : int
        Number of neighborns for knn
    n_input_days : int
        Length of input sequences
    n_predict_days : int
        Number of days to predict
    test_size : float
        Fraction of data for testing
    end_date : str, optional
        End date for backtesting, by default ""
    no_shuffle : bool, optional
        Flag to shuffle data randomly, by default True
    """
    (
        forecast_data_df,
        preds,
        y_valid,
        y_dates_valid,
        scaler,
    ) = knn_model.get_knn_model_data(
        data, n_input_days, n_predict_days, n_neighbors, test_size, end_date, no_shuffle
    )
    if forecast_data_df.empty:
        print("Issue performing data prep and prediction")
        return

    print_pretty_prediction(forecast_data_df[0], data.values[-1])
    plot_data_predictions(
        data,
        preds,
        y_valid,
        y_dates_valid,
        scaler,
        f"KNN Model with {n_neighbors} Neighbors on {ticker}",
        forecast_data_df,
        1,
    )
    print("")
示例#2
0
def display_exponential_smoothing(
    ticker: str,
    values: Union[pd.DataFrame, pd.Series],
    n_predict: int,
    trend: str = "N",
    seasonal: str = "N",
    seasonal_periods: int = 5,
    s_end_date: str = "",
    export: str = "",
    time_res: str = "",
):
    """Perform exponential smoothing

    Parameters
    ----------
    ticker : str
        Dataset being smoothed
    values : Union[pd.DataFrame, pd.Series]
        Raw data
    n_predict : int
        Days to predict
    trend : str, optional
        Trend variable, by default "N"
    seasonal : str, optional
        Seasonal variable, by default "N"
    seasonal_periods : int, optional
        Number of seasonal periods, by default 5
    s_end_date : str, optional
        End date for backtesting, by default ""
    export : str, optional
        Format to export data, by default ""
    time_res : str
        Resolution for data, allowing for predicting outside of standard market days
    """
    if s_end_date:
        if not time_res:
            future_index = get_next_stock_market_days(
                last_stock_day=s_end_date, n_next_days=n_predict)
        else:
            future_index = pd.date_range(s_end_date,
                                         periods=n_predict + 1,
                                         freq=time_res)[1:]

        if future_index[-1] > datetime.datetime.now():
            console.print(
                "Backtesting not allowed, since End Date + Prediction days is in the future\n"
            )
            return

        df_future = values[future_index[0]:future_index[-1]]
        values = values[:s_end_date]  # type: ignore

    # Get ETS model
    model, title, forecast = ets_model.get_exponential_smoothing_model(
        values, trend, seasonal, seasonal_periods, n_predict)

    if not forecast:
        console.print("No forecast made.  Model did not converge.\n")
        return

    if np.isnan(forecast).any():
        console.print("Model predicted NaN values.  Runtime Error.\n")
        return

    if not time_res:
        l_pred_days = get_next_stock_market_days(
            last_stock_day=values.index[-1],
            n_next_days=n_predict,
        )
    else:
        l_pred_days = pd.date_range(values.index[-1],
                                    periods=n_predict + 1,
                                    freq=time_res)[1:]

    df_pred = pd.Series(forecast, index=l_pred_days, name="Price")

    console.print(f"\n{title}")
    console.print("\nFit model parameters:")
    for key, value in model.params.items():
        console.print(f"{key} {' '*(18-len(key))}: {value}")

    console.print("\nAssess fit model:")
    console.print(f"AIC: {round(model.aic, 2)}")
    console.print(f"BIC: {round(model.bic, 2)}")
    console.print(f"SSE: {round(model.sse, 2)}\n")

    # Plotting
    fig, ax = plt.subplots(figsize=plot_autoscale(), dpi=PLOT_DPI)
    ax.plot(values.index, values.values, lw=2)
    # BACKTESTING
    if s_end_date:
        ax.set_title(f"BACKTESTING: {title} on {ticker}")
    else:
        ax.set_title(f"{title} on {ticker}")

    ax.set_xlim(
        values.index[0],
        get_next_stock_market_days(df_pred.index[-1], 1)[-1],
    )
    ax.set_xlabel("Time")
    ax.set_ylabel("Share Price ($)")
    ax.grid(b=True, which="major", color="#666666", linestyle="-")
    ax.minorticks_on()
    ax.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2)
    ax.plot(
        [values.index[-1], df_pred.index[0]],
        [values.values[-1], df_pred.values[0]],
        lw=1,
        c="tab:green",
        linestyle="--",
    )
    ax.plot(df_pred.index, df_pred, lw=2, c="tab:green")
    ax.axvspan(
        values.index[-1],
        df_pred.index[-1],
        facecolor="tab:orange",
        alpha=0.2,
    )
    _, _, ymin, ymax = plt.axis()
    ax.vlines(
        values.index[-1],
        ymin,
        ymax,
        linewidth=1,
        linestyle="--",
        color="k",
    )
    dateFmt = mdates.DateFormatter("%m/%d/%Y")
    ax.xaxis.set_major_formatter(dateFmt)
    ax.tick_params(axis="x", labelrotation=45)

    # BACKTESTING
    if s_end_date:
        ax.plot(
            df_future.index,
            df_future,
            lw=2,
            c="tab:blue",
            ls="--",
        )
        ax.plot(
            [values.index[-1], df_future.index[0]],
            [
                values.values[-1],
                df_future.values[0],
            ],
            lw=1,
            c="tab:blue",
            linestyle="--",
        )

    if gtff.USE_ION:
        plt.ion()

    fig.tight_layout()
    plt.show()

    # BACKTESTING
    if s_end_date:
        dateFmt = mdates.DateFormatter("%m-%d")
        fig, ax = plt.subplots(1, 2, figsize=plot_autoscale(), dpi=PLOT_DPI)
        ax0 = ax[0]
        ax0.plot(
            df_future.index,
            df_future,
            lw=2,
            c="tab:blue",
            ls="--",
        )
        ax0.plot(df_pred.index, df_pred, lw=2, c="green")
        ax0.scatter(
            df_future.index,
            df_future,
            c="tab:blue",
            lw=3,
        )
        ax0.plot(
            [values.index[-1], df_future.index[0]],
            [
                values.values[-1],
                df_future.values[0],
            ],
            lw=2,
            c="tab:blue",
            ls="--",
        )
        ax0.scatter(df_pred.index, df_pred, c="green", lw=3)
        ax0.plot(
            [values.index[-1], df_pred.index[0]],
            [values.values[-1], df_pred.values[0]],
            lw=2,
            c="green",
            ls="--",
        )
        ax0.set_title("BACKTESTING: Prices")
        ax0.set_xlim(
            values.index[-1],
            df_pred.index[-1] + datetime.timedelta(days=1),
        )
        ax0.set_ylabel("Share Price ($)")
        ax0.grid(b=True, which="major", color="#666666", linestyle="-")
        ax0.legend(["Real data", "Prediction data"])

        ax1 = ax[1]
        ax1.axhline(y=0, color="k", linestyle="--", linewidth=2)
        ax1.plot(
            df_future.index,
            100 * (df_pred.values - df_future.values) / df_future.values,
            lw=2,
            c="red",
        )
        ax1.scatter(
            df_future.index,
            100 * (df_pred.values - df_future.values) / df_future.values,
            c="red",
            lw=5,
        )
        ax1.set_title("BACKTESTING: % Error")
        ax1.plot(
            [values.index[-1], df_future.index[0]],
            [
                0,
                100 * (df_pred.values[0] - df_future.values[0]) /
                df_future.values[0],
            ],
            lw=2,
            ls="--",
            c="red",
        )
        ax1.set_xlim(
            values.index[-1],
            df_pred.index[-1] + datetime.timedelta(days=1),
        )
        ax1.set_xlabel("Time")
        ax1.set_ylabel("Prediction Error (%)")
        ax1.grid(b=True, which="major", color="#666666", linestyle="-")
        ax1.legend(["Real data", "Prediction data"])

        ax0.xaxis.set_major_formatter(dateFmt)
        ax0.tick_params(axis="x", labelrotation=45)
        ax1.xaxis.set_major_formatter(dateFmt)
        ax1.tick_params(axis="x", labelrotation=45)

        if gtff.USE_ION:
            plt.ion()
        fig.tight_layout()
        plt.show()

        # Refactor prediction dataframe for backtesting print
        df_pred.name = "Prediction"
        df_pred = df_pred.to_frame()
        df_pred["Real"] = df_future

        if gtff.USE_COLOR:

            patch_pandas_text_adjustment()

            console.print("Time         Real [$]  x  Prediction [$]")
            console.print(
                df_pred.apply(price_prediction_backtesting_color,
                              axis=1).to_string())
        else:
            console.print(df_pred[["Real", "Prediction"]].round(2).to_string())

        console.print("")
        print_prediction_kpis(df_pred["Real"].values,
                              df_pred["Prediction"].values)

    else:
        # Print prediction data
        print_pretty_prediction(df_pred, values.values[-1])
    export_data(export, os.path.dirname(os.path.abspath(__file__)), "ets")

    console.print("")
示例#3
0
def arima(other_args: List[str], s_ticker: str, df_stock: pd.DataFrame):
    """
    ARIMA prediction
    Parameters
    ----------
    other_args: List[str]
        Argparse arguments
    s_ticker: str
        ticker
    df_stock: pd.DataFrame
        Dataframe of prices

    """
    parser = argparse.ArgumentParser(
        add_help=False,
        prog="arima",
        description="""
            In statistics and econometrics, and in particular in time series analysis, an
            autoregressive integrated moving average (ARIMA) model is a generalization of an
            autoregressive moving average (ARMA) model. Both of these models are fitted to time
            series data either to better understand the data or to predict future points in the
            series (forecasting). ARIMA(p,d,q) where parameters p, d, and q are non-negative
            integers, p is the order (number of time lags) of the autoregressive model, d is the
            degree of differencing (the number of times the data have had past values subtracted),
            and q is the order of the moving-average model.
        """,
    )

    parser.add_argument(
        "-d",
        "--days",
        action="store",
        dest="n_days",
        type=check_positive,
        default=5,
        help="prediction days.",
    )
    parser.add_argument(
        "-i",
        "--ic",
        action="store",
        dest="s_ic",
        type=str,
        default="aic",
        choices=["aic", "aicc", "bic", "hqic", "oob"],
        help="information criteria.",
    )
    parser.add_argument(
        "-s",
        "--seasonal",
        action="store_true",
        default=False,
        dest="b_seasonal",
        help="Use weekly seasonal data.",
    )
    parser.add_argument(
        "-o",
        "--order",
        action="store",
        dest="s_order",
        type=str,
        help="arima model order (p,d,q) in format: p,d,q.",
    )
    parser.add_argument(
        "-r",
        "--results",
        action="store_true",
        dest="b_results",
        default=False,
        help="results about ARIMA summary flag.",
    )
    parser.add_argument(
        "-e",
        "--end",
        action="store",
        type=valid_date,
        dest="s_end_date",
        default=None,
        help="The end date (format YYYY-MM-DD) to select - Backtesting",
    )

    try:
        ns_parser = parse_known_args_and_warn(parser, other_args)
        if not ns_parser:
            return

        # BACKTESTING
        if ns_parser.s_end_date:

            if ns_parser.s_end_date < df_stock.index[0]:
                print(
                    "Backtesting not allowed, since End Date is older than Start Date of historical data\n"
                )
                return

            if (
                ns_parser.s_end_date
                < get_next_stock_market_days(
                    last_stock_day=df_stock.index[0], n_next_days=5 + ns_parser.n_days
                )[-1]
            ):
                print(
                    "Backtesting not allowed, since End Date is too close to Start Date to train model\n"
                )
                return

            future_index = get_next_stock_market_days(
                last_stock_day=ns_parser.s_end_date, n_next_days=ns_parser.n_days
            )

            if future_index[-1] > datetime.datetime.now():
                print(
                    "Backtesting not allowed, since End Date + Prediction days is in the future\n"
                )
                return

            df_future = df_stock[future_index[0] : future_index[-1]]
            df_stock = df_stock[: ns_parser.s_end_date]

        # Machine Learning model
        if ns_parser.s_order:
            t_order = tuple(int(ord) for ord in ns_parser.s_order.split(","))
            model = ARIMA(df_stock["Adj Close"].values, order=t_order).fit()
            l_predictions = model.predict(
                start=len(df_stock["Adj Close"]) + 1,
                end=len(df_stock["Adj Close"]) + ns_parser.n_days,
            )
        else:
            if ns_parser.b_seasonal:
                model = pmdarima.auto_arima(
                    df_stock["Adj Close"].values,
                    error_action="ignore",
                    seasonal=True,
                    m=5,
                    information_criteria=ns_parser.s_ic,
                )
            else:
                model = pmdarima.auto_arima(
                    df_stock["Adj Close"].values,
                    error_action="ignore",
                    seasonal=False,
                    information_criteria=ns_parser.s_ic,
                )
            l_predictions = [
                i if i > 0 else 0 for i in model.predict(n_periods=ns_parser.n_days)
            ]

        # Prediction data
        l_pred_days = get_next_stock_market_days(
            last_stock_day=df_stock["Adj Close"].index[-1],
            n_next_days=ns_parser.n_days,
        )
        df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price")

        if ns_parser.b_results:
            print(model.summary())
            print("")

        # Plotting
        plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI)
        plt.plot(df_stock.index, df_stock["Adj Close"], lw=2)
        if ns_parser.s_order:
            # BACKTESTING
            if ns_parser.s_end_date:
                plt.title(
                    f"BACKTESTING: ARIMA {str(t_order)} on {s_ticker} - {ns_parser.n_days} days prediction"
                )
            else:
                plt.title(
                    f"ARIMA {str(t_order)} on {s_ticker} - {ns_parser.n_days} days prediction"
                )
        else:
            # BACKTESTING
            if ns_parser.s_end_date:
                plt.title(
                    f"BACKTESTING: ARIMA {model.order} on {s_ticker} - {ns_parser.n_days} days prediction"
                )
            else:
                plt.title(
                    f"ARIMA {model.order} on {s_ticker} - {ns_parser.n_days} days prediction"
                )
        plt.xlim(
            df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1]
        )
        plt.xlabel("Time")
        plt.ylabel("Share Price ($)")
        plt.grid(b=True, which="major", color="#666666", linestyle="-")
        plt.minorticks_on()
        plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2)
        plt.plot(
            [df_stock.index[-1], df_pred.index[0]],
            [df_stock["Adj Close"].values[-1], df_pred.values[0]],
            lw=1,
            c="tab:green",
            linestyle="--",
        )
        plt.plot(df_pred.index, df_pred, lw=2, c="tab:green")
        plt.axvspan(
            df_stock.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2
        )
        _, _, ymin, ymax = plt.axis()
        plt.vlines(
            df_stock.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k"
        )

        # BACKTESTING
        if ns_parser.s_end_date:
            plt.plot(
                df_future.index,
                df_future["Adj Close"],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.plot(
                [df_stock.index[-1], df_future.index[0]],
                [
                    df_stock["Adj Close"].values[-1],
                    df_future["Adj Close"].values[0],
                ],
                lw=1,
                c="tab:blue",
                linestyle="--",
            )

        if gtff.USE_ION:
            plt.ion()

        plt.show()

        # BACKTESTING
        if ns_parser.s_end_date:
            plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI)
            plt.subplot(211)
            plt.plot(
                df_future.index,
                df_future["Adj Close"],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.plot(df_pred.index, df_pred, lw=2, c="green")
            plt.scatter(df_future.index, df_future["Adj Close"], c="tab:blue", lw=3)
            plt.plot(
                [df_stock.index[-1], df_future.index[0]],
                [
                    df_stock["Adj Close"].values[-1],
                    df_future["Adj Close"].values[0],
                ],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.scatter(df_pred.index, df_pred, c="green", lw=3)
            plt.plot(
                [df_stock.index[-1], df_pred.index[0]],
                [df_stock["Adj Close"].values[-1], df_pred.values[0]],
                lw=2,
                c="green",
                ls="--",
            )
            plt.title("BACKTESTING: Real data price versus Prediction")
            plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1))
            plt.xticks(
                [df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)],
                visible=True,
            )
            plt.ylabel("Share Price ($)")
            plt.grid(b=True, which="major", color="#666666", linestyle="-")
            plt.minorticks_on()
            plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2)
            plt.legend(["Real data", "Prediction data"])
            plt.xticks([])

            plt.subplot(212)
            plt.axhline(y=0, color="k", linestyle="--", linewidth=2)
            plt.plot(
                df_future.index,
                100
                * (df_pred.values - df_future["Adj Close"].values)
                / df_future["Adj Close"].values,
                lw=2,
                c="red",
            )
            plt.scatter(
                df_future.index,
                100
                * (df_pred.values - df_future["Adj Close"].values)
                / df_future["Adj Close"].values,
                c="red",
                lw=5,
            )
            plt.title("BACKTESTING: Error between Real data and Prediction [%]")
            plt.plot(
                [df_stock.index[-1], df_future.index[0]],
                [
                    0,
                    100
                    * (df_pred.values[0] - df_future["Adj Close"].values[0])
                    / df_future["Adj Close"].values[0],
                ],
                lw=2,
                ls="--",
                c="red",
            )
            plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1))
            plt.xticks(
                [df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)],
                visible=True,
            )
            plt.xlabel("Time")
            plt.ylabel("Prediction Error (%)")
            plt.grid(b=True, which="major", color="#666666", linestyle="-")
            plt.minorticks_on()
            plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2)
            plt.legend(["Real data", "Prediction data"])

            if gtff.USE_ION:
                plt.ion()

            plt.show()

            # Refactor prediction dataframe for backtesting print
            df_pred.name = "Prediction"
            df_pred = df_pred.to_frame()
            df_pred["Real"] = df_future["Adj Close"]

            if gtff.USE_COLOR:

                patch_pandas_text_adjustment()

                print("Time         Real [$]  x  Prediction [$]")
                print(
                    df_pred.apply(
                        price_prediction_backtesting_color, axis=1
                    ).to_string()
                )
            else:
                print(df_pred[["Real", "Prediction"]].round(2).to_string())

            print("")
            print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values)

        else:
            # Print prediction data
            print_pretty_prediction(df_pred, df_stock["Adj Close"].values[-1])
        print("")

    except Exception as e:
        print(e, "\n")
def display_rnn(
    dataset: str,
    data: Union[pd.Series, pd.DataFrame],
    n_input_days: int,
    n_predict_days: int,
    learning_rate: float,
    epochs: int,
    batch_size: int,
    test_size: float,
    n_loops: int,
    no_shuffle: bool,
    time_res: str = "",
):
    """Display trained RNN model

    Parameters
    ----------
    dataset : str
        Dataset for model
    data : Union[pd.Series, pd.DataFrame]
        Data to feed to model
    n_input_days : int
        Number of inputs to train
    n_predict_days : int
        Number of outputs to predict
    learning_rate : float
        MLP learning rate
    epochs : int
        Number of training epochs
    batch_size : int
        Training batch size
    test_size : float
        Size of test set
    n_loops : int
        Number of loops to perform for model
    no_shuffle : bool
        Flag to not randomly shuffle data
    time_res : str
        Resolution for data, allowing for predicting outside of standard market days
    """

    (
        forecast_data_df,
        preds,
        y_valid,
        y_dates_valid,
        scaler,
    ) = neural_networks_model.rnn_model(
        data,
        n_input_days,
        n_predict_days,
        learning_rate,
        epochs,
        batch_size,
        test_size,
        n_loops,
        no_shuffle,
    )
    if time_res:
        forecast_data_df.index = pd.date_range(data.index[-1],
                                               periods=n_predict_days + 1,
                                               freq=time_res)[1:]
    if n_loops > 1:
        forecast_data_df["Median"] = forecast_data_df.median(axis=1)
        print_pretty_prediction(forecast_data_df["Median"], data.values[-1])
    else:
        print_pretty_prediction(forecast_data_df[0], data.values[-1])
    plot_data_predictions(
        data,
        np.median(preds, axis=0),
        y_valid,
        y_dates_valid,
        scaler,
        f"RNN Model on {dataset}",
        forecast_data_df,
        n_loops,
        time_res,
    )
    console.print("")
示例#5
0
def display_k_nearest_neighbors(
    ticker: str,
    data: Union[pd.DataFrame, pd.Series],
    n_neighbors: int,
    n_input_days: int,
    n_predict_days: int,
    test_size: float,
    end_date: str = "",
    no_shuffle: bool = True,
    time_res: str = "",
    external_axes: Optional[List[plt.Axes]] = None,
):
    """Display predictions using knn

    Parameters
    ----------
    ticker : str
        Stock data
    data : Union[pd.DataFrame, pd.Series]
        Data to use for ML
    n_neighbors : int
        Number of neighbors for knn
    n_input_days : int
        Length of input sequences
    n_predict_days : int
        Number of days to predict
    test_size : float
        Fraction of data for testing
    end_date : str, optional
        End date for backtesting, by default ""
    no_shuffle : bool, optional
        Flag to shuffle data randomly, by default True
    time_res : str
        Resolution for data, allowing for predicting outside of standard market days
    external_axes : Optional[List[plt.Axes]], optional
        External axes (1 axis is expected in the list), by default None
    """
    (
        forecast_data_df,
        preds,
        y_valid,
        y_dates_valid,
        scaler,
    ) = knn_model.get_knn_model_data(
        data, n_input_days, n_predict_days, n_neighbors, test_size, end_date, no_shuffle
    )

    if forecast_data_df.empty:
        console.print("Issue performing data prep and prediction")
        return

    if time_res:
        forecast_data_df.index = pd.date_range(
            data.index[-1], periods=n_predict_days + 1, freq=time_res
        )[1:]
    print_pretty_prediction(forecast_data_df[0], data.values[-1])
    plot_data_predictions(
        data=data,
        preds=preds,
        y_valid=y_valid,
        y_dates_valid=y_dates_valid,
        scaler=scaler,
        title=f"KNN Model with {n_neighbors} Neighbors on {ticker}",
        forecast_data=forecast_data_df,
        n_loops=1,
        time_str=time_res,
        external_axes=external_axes,
    )
    console.print("")
def display_arima(
    dataset: str,
    values: Union[pd.DataFrame, pd.Series],
    arima_order: str,
    n_predict: int,
    seasonal: bool,
    ic: str,
    results: bool,
    s_end_date: str = "",
    export: str = "",
):
    """View fit ARIMA model

    Parameters
    ----------
    dataset : str
        String indicating dataset (for plot title)
    values : Union[pd.DataFrame, pd.Series]
        Data to fit
    arima_order : str
        String of ARIMA params in form "p,q,d"
    n_predict : int
        Days to predict
    seasonal : bool
        Flag to use seasonal model
    ic : str
        Information Criteria for model evaluation
    results : bool
        Flag to display model summary
    s_end_date : str, optional
        Specified end date for backtesting comparisons
    export : str, optional
        Format to export image
    """

    if arima_order:
        t_order = tuple(int(ord) for ord in arima_order.split(","))
    if s_end_date:
        future_index = get_next_stock_market_days(last_stock_day=s_end_date,
                                                  n_next_days=n_predict)

        if future_index[-1] > datetime.datetime.now():
            print(
                "Backtesting not allowed, since End Date + Prediction days is in the future\n"
            )
            return

        df_future = values[future_index[0]:future_index[-1]]
        values = values[:s_end_date]  # type: ignore

    l_predictions, model = arima_model.get_arima_model(values, arima_order,
                                                       n_predict, seasonal, ic)

    # Prediction data
    l_pred_days = get_next_stock_market_days(
        last_stock_day=values.index[-1],
        n_next_days=n_predict,
    )
    df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price")

    if results:
        print(model.summary())
        print("")

    # Plotting
    fig, ax = plt.subplots(figsize=plot_autoscale(), dpi=PLOT_DPI)
    ax.plot(values.index, values, lw=2)

    # pylint:disable=no-member

    if arima_order:
        # BACKTESTING
        if s_end_date:
            ax.set_title(
                f"BACKTESTING: ARIMA {str(t_order)} on {dataset} - {n_predict} days prediction"
            )
        else:
            ax.set_title(
                f"ARIMA {str(t_order)} on {dataset} - {n_predict} days prediction"
            )
    else:
        # BACKTESTING
        if s_end_date:
            ax.set_title(
                f"BACKTESTING: ARIMA {model.order} on {dataset} - {n_predict} days prediction"
            )
        else:
            plt.title(
                f"ARIMA {model.order} on {dataset} - {n_predict} days prediction"
            )
    ax.set_xlim(values.index[0],
                get_next_stock_market_days(df_pred.index[-1], 1)[-1])
    ax.set_xlabel("Time")
    ax.set_ylabel("Value")
    ax.grid(b=True, which="major", color="#666666", linestyle="-")
    ax.minorticks_on()
    ax.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2)
    ax.plot(
        [values.index[-1], df_pred.index[0]],
        [values.values[-1], df_pred.values[0]],
        lw=1,
        c="tab:green",
        linestyle="--",
    )
    ax.plot(df_pred.index, df_pred, lw=2, c="tab:green")
    ax.axvspan(values.index[-1],
               df_pred.index[-1],
               facecolor="tab:orange",
               alpha=0.2)
    _, _, ymin, ymax = plt.axis()
    ax.vlines(values.index[-1],
              ymin,
              ymax,
              linewidth=1,
              linestyle="--",
              color="k")

    # BACKTESTING
    if s_end_date:
        ax.plot(
            df_future.index,
            df_future.values,
            lw=2,
            c="tab:blue",
            ls="--",
        )
        plt.plot(
            [values.index[-1], df_future.index[0]],
            [
                values.values[-1],
                df_future.values[0],
            ],
            lw=1,
            c="tab:blue",
            linestyle="--",
        )

    fig.tight_layout()
    if gtff.USE_ION:
        plt.ion()

    plt.show()

    # BACKTESTING
    if s_end_date:
        fig, ax = plt.subplots(1, 2, figsize=plot_autoscale(), dpi=PLOT_DPI)
        ax0 = ax[0]
        ax0.plot(
            df_future.index,
            df_future.values,
            lw=2,
            c="tab:blue",
            ls="--",
        )
        ax0.plot(df_pred.index, df_pred, lw=2, c="green")
        ax0.scatter(df_future.index, df_future, c="tab:blue", lw=3)
        ax0.plot(
            [values.index[-1], df_future.index[0]],
            [
                values.values[-1],
                df_future.values[0],
            ],
            lw=2,
            c="tab:blue",
            ls="--",
        )
        ax0.scatter(df_pred.index, df_pred, c="green", lw=3)
        ax0.plot(
            [values.index[-1], df_pred.index[0]],
            [values.values[-1], df_pred.values[0]],
            lw=2,
            c="green",
            ls="--",
        )
        ax0.set_title("BACKTESTING: Real data Prediction")
        ax0.set_xlim(values.index[-1],
                     df_pred.index[-1] + datetime.timedelta(days=1))
        ax0.set_xticks(
            [values.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)])
        ax0.set_ylabel("Value")
        ax0.grid(b=True, which="major", color="#666666", linestyle="-")
        ax0.minorticks_on()
        ax0.grid(b=True,
                 which="minor",
                 color="#999999",
                 linestyle="-",
                 alpha=0.2)
        ax0.legend(["Real data", "Prediction data"])
        ax0.set_xticks([])

        ax1 = ax[1]
        ax1.axhline(y=0, color="k", linestyle="--", linewidth=2)
        ax1.plot(
            df_future.index,
            100 * (df_pred.values - df_future.values) / df_future.values,
            lw=2,
            c="red",
        )
        ax1.scatter(
            df_future.index,
            100 * (df_pred.values - df_future.values) / df_future.values,
            c="red",
            lw=5,
        )
        ax1.set_title(
            "BACKTESTING: Error between Real data and Prediction [%]")
        ax1.plot(
            [values.index[-1], df_future.index[0]],
            [
                0,
                100 * (df_pred.values[0] - df_future.values[0]) /
                df_future.values[0],
            ],
            lw=2,
            ls="--",
            c="red",
        )
        ax1.set_xlim(values.index[-1],
                     df_pred.index[-1] + datetime.timedelta(days=1))
        ax1.set_xticks(
            [values.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)])
        ax1.set_xlabel("Time")
        ax1.set_ylabel("Prediction Error (%)")
        ax1.grid(b=True, which="major", color="#666666", linestyle="-")
        ax1.minorticks_on()
        ax1.grid(b=True,
                 which="minor",
                 color="#999999",
                 linestyle="-",
                 alpha=0.2)
        ax1.legend(["Real data", "Prediction data"])
        fig.tight_layout()
        if gtff.USE_ION:
            plt.ion()

        plt.show()

        # Refactor prediction dataframe for backtesting print
        df_pred.name = "Prediction"
        df_pred = df_pred.to_frame()
        df_pred["Real"] = df_future.values

        if gtff.USE_COLOR:
            if gtff.USE_TABULATE_DF:
                df_pred["Real"] = df_pred["Real"].astype(float)
                df_pred["Prediction"] = df_pred["Prediction"].astype(float)
                df_pred["Dif"] = (100 * (df_pred.Prediction - df_pred.Real) /
                                  df_pred.Real)
                print(
                    tabulate(
                        df_pred,
                        headers=[
                            "Date", "Predicted", "Actual", "% Difference"
                        ],
                        showindex=True,
                        floatfmt=".2f",
                        tablefmt="fancy_grid",
                    ))
            else:
                patch_pandas_text_adjustment()
                print("Time         Real [$]  x  Prediction [$]")
                print(
                    df_pred.apply(price_prediction_backtesting_color,
                                  axis=1).to_string())
        else:
            if gtff.USE_TABULATE_DF:
                df_pred["Real"] = df_pred["Real"].astype(float)
                df_pred["Prediction"] = df_pred["Predicted"].astype(float)
                df_pred["Dif"] = (100 * (df_pred.Prediction - df_pred.Real) /
                                  df_pred.Real)
                print(
                    tabulate(
                        df_pred,
                        headers=[
                            "Date", "Predicted", "Actual", "% Difference"
                        ],
                        showindex=True,
                        floatfmt=".2f",
                        tablefmt="fancy_grid",
                    ))
            else:
                print(df_pred[["Real", "Prediction"]].round(2).to_string())

        print("")
        print_prediction_kpis(df_pred["Real"].values,
                              df_pred["Prediction"].values)

    else:
        # Print prediction data
        print_pretty_prediction(df_pred, values.values[-1])
    export_data(export, os.path.dirname(os.path.abspath(__file__)), "arima")
    print("")
示例#7
0
def display_regression(
    dataset: str,
    values: Union[pd.Series, pd.DataFrame],
    poly_order: int,
    n_input: int,
    n_predict: int,
    n_jumps: int,
    s_end_date: str = "",
    export: str = "",
    time_res: str = "",
):
    """Display predications for regression models

    Parameters
    ----------
    dataset : str
        Title for data
    values : Union[pd.Series, pd.DataFrame]
        Data to fit
    poly_order : int
        Order of polynomial to fit
    n_input : int
        Length of input sequence
    n_predict : int
        Length of prediction sequence
    n_jumps : int
        Number of jumps in data
    s_end_date : str, optional
        Start date for backtesting
    export : str, optional
        Format for exporting figures
    time_res : str
        Resolution for data, allowing for predicting outside of standard market days
    """
    # BACKTESTING
    if s_end_date:
        if not time_res:
            future_index = get_next_stock_market_days(
                last_stock_day=s_end_date, n_next_days=n_predict)
        else:
            future_index = pd.date_range(s_end_date,
                                         periods=n_predict + 1,
                                         freq=time_res)[1:]

        df_future = values[future_index[0]:future_index[-1]]
        values = values[:s_end_date]  # type: ignore

    l_predictions, _ = regression_model.get_regression_model(
        values, poly_order, n_input, n_predict, n_jumps)

    # Prediction data
    if not time_res:
        l_pred_days = get_next_stock_market_days(
            last_stock_day=values.index[-1],
            n_next_days=n_predict,
        )
    else:
        l_pred_days = pd.date_range(values.index[-1],
                                    periods=n_predict + 1,
                                    freq=time_res)[1:]
    df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price")

    # Plotting
    fig, ax = plt.subplots(figsize=plot_autoscale(), dpi=PLOT_DPI)
    ax.plot(values.index, values, lw=2)
    # BACKTESTING
    if s_end_date:
        ax.set_title(
            f"BACKTESTING: Regression (polynomial {poly_order}) on {dataset} - {n_predict} step prediction"
        )
    else:
        ax.set_title(
            f"Regression (polynomial {poly_order}) on {dataset} - {n_predict} step prediction"
        )
    ax.set_xlim(values.index[0], l_pred_days[-1])
    ax.set_xlabel("Time")
    ax.set_ylabel("Value")
    ax.grid(b=True, which="major", color="#666666", linestyle="-")
    ax.minorticks_on()
    ax.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2)
    ax.plot(
        [values.index[-1], df_pred.index[0]],
        [values.values[-1], df_pred.values[0]],
        lw=1,
        c="tab:green",
        linestyle="--",
    )
    ax.plot(df_pred.index, df_pred, lw=2, c="tab:green")
    ax.axvspan(values.index[-1],
               df_pred.index[-1],
               facecolor="tab:orange",
               alpha=0.2)
    _, _, ymin, ymax = plt.axis()
    ax.vlines(values.index[-1],
              ymin,
              ymax,
              linewidth=1,
              linestyle="--",
              color="k")

    # BACKTESTING
    if s_end_date:
        ax.plot(
            df_future.index,
            df_future,
            lw=2,
            c="tab:blue",
            ls="--",
        )
        ax.plot(
            [values.index[-1], df_future.index[0]],
            [
                values.values[-1],
                df_future.values[0],
            ],
            lw=1,
            c="tab:blue",
            linestyle="--",
        )
    fig.tight_layout()
    if gtff.USE_ION:
        plt.ion()

    plt.show()

    export_data(export, os.path.dirname(os.path.abspath(__file__)),
                "regression")
    console.print("")

    # BACKTESTING
    if s_end_date:
        fig, ax = plt.subplots(1, 2, figsize=plot_autoscale(), dpi=PLOT_DPI)
        ax0 = ax[0]
        ax0.plot(
            df_future.index,
            df_future,
            lw=2,
            c="tab:blue",
            ls="--",
        )
        ax0.plot(df_pred.index, df_pred, lw=2, c="green")
        ax0.scatter(df_future.index, df_future, c="tab:blue", lw=3)
        ax0.plot(
            [values.index[-1], df_future.index[0]],
            [
                values.values[-1],
                df_future.values[0],
            ],
            lw=2,
            c="tab:blue",
            ls="--",
        )
        ax0.scatter(df_pred.index, df_pred, c="green", lw=3)
        ax0.plot(
            [values.index[-1], df_pred.index[0]],
            [values.values[-1], df_pred.values[0]],
            lw=2,
            c="green",
            ls="--",
        )
        ax0.set_title("BACKTESTING: Real data vs Prediction")
        ax0.set_xlim(values.index[-1], df_pred.index[-1])
        ax0.set_xticks([values.index[-1], df_pred.index[-1]])
        ax0.set_ylabel("Value")
        ax0.grid(b=True, which="major", color="#666666", linestyle="-")
        ax0.minorticks_on()
        ax0.grid(b=True,
                 which="minor",
                 color="#999999",
                 linestyle="-",
                 alpha=0.2)
        ax0.legend(["Real data", "Prediction data"])
        ax0.set_xticks([])

        ax1 = ax[1]
        ax1.axhline(y=0, color="k", linestyle="--", linewidth=2)
        ax1.plot(
            df_future.index,
            100 * (df_pred.values - df_future.values) / df_future.values,
            lw=2,
            c="red",
        )
        ax1.scatter(
            df_future.index,
            100 * (df_pred.values - df_future.values) / df_future.values,
            c="red",
            lw=5,
        )
        ax1.set_title(
            "BACKTESTING: Error between Real data and Prediction [%]")
        ax1.plot(
            [values.index[-1], df_future.index[0]],
            [
                0,
                100 * (df_pred.values[0] - df_future.values[0]) /
                df_future.values[0],
            ],
            lw=2,
            ls="--",
            c="red",
        )
        ax1.set_xlim(values.index[-1], df_pred.index[-1])
        ax1.set_xticks([values.index[-1], df_pred.index[-1]])
        ax1.set_xlabel("Time")
        ax1.set_ylabel("Prediction Error (%)")
        ax1.grid(b=True, which="major", color="#666666", linestyle="-")
        ax1.minorticks_on()
        ax1.grid(b=True,
                 which="minor",
                 color="#999999",
                 linestyle="-",
                 alpha=0.2)
        ax1.legend(["Real data", "Prediction data"])
        fig.tight_layout()
        if gtff.USE_ION:
            plt.ion()

        plt.show()

        # Refactor prediction dataframe for backtesting print
        df_pred.name = "Prediction"
        df_pred = df_pred.to_frame()
        df_pred["Real"] = df_future

        if gtff.USE_COLOR:

            patch_pandas_text_adjustment()

            console.print("Time         Real [$]  x  Prediction [$]")
            console.print(
                df_pred.apply(price_prediction_backtesting_color,
                              axis=1).to_string())
        else:
            console.print(df_pred[["Real", "Prediction"]].round(2).to_string())

        console.print("")
        print_prediction_kpis(df_pred["Real"].values,
                              df_pred["Prediction"].values)

    else:
        # Print prediction data
        print_pretty_prediction(df_pred, values.values[-1])
    console.print("")
示例#8
0
def display_regression(
    dataset: str,
    values: Union[pd.Series, pd.DataFrame],
    poly_order: int,
    n_input: int,
    n_predict: int,
    n_jumps: int,
    s_end_date: str = "",
    export: str = "",
    time_res: str = "",
    external_axes: Optional[List[plt.Axes]] = None,
):
    """Display predications for regression models

    Parameters
    ----------
    dataset : str
        Title for data
    values : Union[pd.Series, pd.DataFrame]
        Data to fit
    poly_order : int
        Order of polynomial to fit
    n_input : int
        Length of input sequence
    n_predict : int
        Length of prediction sequence
    n_jumps : int
        Number of jumps in data
    s_end_date : str, optional
        Start date for backtesting
    export : str, optional
        Format for exporting figures
    time_res : str
        Resolution for data, allowing for predicting outside of standard market days
    external_axes : Optional[List[plt.Axes]], optional
        External axes (1 axis is expected in the list), by default None
    """
    # BACKTESTING
    if s_end_date:
        if not time_res:
            future_index = get_next_stock_market_days(
                last_stock_day=s_end_date, n_next_days=n_predict
            )
        else:
            future_index = pd.date_range(
                s_end_date, periods=n_predict + 1, freq=time_res
            )[1:]

        df_future = values[future_index[0] : future_index[-1]]  # noqa: E203
        values = values[:s_end_date]  # type: ignore

    l_predictions, _ = regression_model.get_regression_model(
        list(values.values), poly_order, n_input, n_predict, n_jumps
    )

    # Prediction data
    if not time_res:
        l_pred_days = get_next_stock_market_days(
            last_stock_day=values.index[-1],
            n_next_days=n_predict,
        )
    else:
        l_pred_days = pd.date_range(
            values.index[-1], periods=n_predict + 1, freq=time_res
        )[1:]
    df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price")

    # Plotting

    # This plot has 1 axes
    if external_axes is None:
        _, ax1 = plt.subplots(figsize=plot_autoscale(), dpi=PLOT_DPI)
    else:
        if (not s_end_date and len(external_axes) != 1) or (
            s_end_date and len(external_axes) != 3
        ):
            logger.error("Expected list of 1 axis or 3 axes when backtesting.")
            console.print(
                "[red]Expected list of 1 axis or 3 axes when backtesting./n[/red]"
            )
            return
        ax1 = external_axes[0]

    ax1.plot(values.index, values)
    # BACKTESTING
    if s_end_date:
        ax1.set_title(
            f"BACKTESTING: Regression (polynomial {poly_order}) on {dataset} - {n_predict} step prediction",
            fontsize=12,
        )
    else:
        ax1.set_title(
            f"Regression (polynomial {poly_order}) on {dataset} - {n_predict} step prediction"
        )
    ax1.set_xlim(values.index[0], l_pred_days[-1])
    ax1.set_ylabel("Value")
    ax1.plot(
        [values.index[-1], df_pred.index[0]],
        [values.values[-1], df_pred.values[0]],
        color=theme.down_color,
        linestyle="--",
    )
    ax1.plot(df_pred.index, df_pred, color=theme.down_color)
    ax1.axvspan(values.index[-1], df_pred.index[-1], alpha=0.2)
    _, _, ymin, ymax = plt.axis()
    ax1.vlines(values.index[-1], ymin, ymax, linestyle="--")

    # BACKTESTING
    if s_end_date:
        ax1.plot(
            df_future.index,
            df_future,
            color=theme.up_color,
            linestyle="--",
        )
        ax1.plot(
            [values.index[-1], df_future.index[0]],
            [
                values.values[-1],
                df_future.values[0],
            ],
            color=theme.up_color,
            linestyle="--",
        )

    theme.style_primary_axis(ax1)

    if external_axes is None:
        theme.visualize_output()

    export_data(export, os.path.dirname(os.path.abspath(__file__)), "regression")
    console.print("")

    # BACKTESTING
    if s_end_date:
        # This plot has 1 axes
        if external_axes is None:
            _, axes = plt.subplots(
                2, 1, sharex=True, figsize=plot_autoscale(), dpi=PLOT_DPI
            )
            (ax2, ax3) = axes
        else:
            if len(external_axes) != 3:
                logger.error("Expected list of three axis items.")
                console.print("[red]Expected list of 3 axis items./n[/red]")
                return
            (_, ax2, ax3) = external_axes

        ax2.plot(
            df_future.index,
            df_future,
            color=theme.up_color,
            linestyle="--",
        )
        ax2.plot(df_pred.index, df_pred, color=theme.down_color, marker="o")
        ax2.plot(
            [values.index[-1], df_future.index[0]],
            [
                values.values[-1],
                df_future.values[0],
            ],
            color=theme.up_color,
            linestyle="--",
        )
        ax2.plot(
            [values.index[-1], df_pred.index[0]],
            [values.values[-1], df_pred.values[0]],
            color=theme.down_color,
            linestyle="--",
            marker="o",
        )
        ax2.set_title("BACKTESTING: Real data vs Prediction", fontsize=12)
        ax2.set_xlim(values.index[-1], df_pred.index[-1])
        ax2.set_ylabel("Value")
        ax2.legend(["Real data", "Prediction data"])

        ax3.axhline(y=0, linestyle="--", color=theme.up_color)
        ax3.plot(
            df_future.index,
            100 * (df_pred.values - df_future.values) / df_future.values,
            color=theme.down_color,
            marker="o",
        )
        ax3.set_title(
            "BACKTESTING: Error between Real data and Prediction [%]", fontsize=12
        )
        ax3.plot(
            [values.index[-1], df_future.index[0]],
            [
                0,
                100 * (df_pred.values[0] - df_future.values[0]) / df_future.values[0],
            ],
            linestyle="--",
            color=theme.down_color,
        )
        ax3.set_xlim(values.index[-1], df_pred.index[-1])
        ax3.set_xlabel("Time")
        ax3.set_ylabel("Error (%)")
        ax3.legend(["Real data", "Prediction data"])

        theme.style_primary_axis(ax2)
        theme.style_primary_axis(ax3)

        if external_axes is None:
            theme.visualize_output()

        # Refactor prediction dataframe for backtesting print
        df_pred.name = "Prediction"
        df_pred = df_pred.to_frame()
        df_pred["Real"] = df_future

        if rich_config.USE_COLOR:

            patch_pandas_text_adjustment()

            console.print("Time         Real [$]  x  Prediction [$]")
            console.print(
                df_pred.apply(
                    lambda_price_prediction_backtesting_color, axis=1
                ).to_string()
            )
        else:
            console.print(df_pred[["Real", "Prediction"]].round(2).to_string())

        console.print("")
        print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values)

    else:
        # Print prediction data
        print_pretty_prediction(df_pred, values.values[-1])
    console.print("")
示例#9
0
def display_arima(
    dataset: str,
    values: Union[pd.DataFrame, pd.Series],
    arima_order: str,
    n_predict: int,
    seasonal: bool,
    ic: str,
    results: bool,
    s_end_date: str = "",
    export: str = "",
    time_res: str = "",
    external_axes: Optional[List[plt.Axes]] = None,
):
    """View fit ARIMA model

    Parameters
    ----------
    dataset : str
        String indicating dataset (for plot title)
    values : Union[pd.DataFrame, pd.Series]
        Data to fit
    arima_order : str
        String of ARIMA params in form "p,q,d"
    n_predict : int
        Days to predict
    seasonal : bool
        Flag to use seasonal model
    ic : str
        Information Criteria for model evaluation
    results : bool
        Flag to display model summary
    s_end_date : str, optional
        Specified end date for backtesting comparisons
    export : str, optional
        Format to export image
    time_res : str
        Resolution for data, allowing for predicting outside of standard market days
    external_axes : Optional[List[plt.Axes]], optional
        External axes (1 axis is expected in the list), by default None
    """

    if arima_order:
        t_order = tuple(int(ord) for ord in arima_order.split(","))
    if s_end_date:
        if not time_res:
            future_index = get_next_stock_market_days(
                last_stock_day=s_end_date, n_next_days=n_predict)
        else:
            future_index = pd.date_range(s_end_date,
                                         periods=n_predict + 1,
                                         freq=time_res)[1:]

        if future_index[-1] > datetime.datetime.now():
            console.print(
                "Backtesting not allowed, since End Date + Prediction days is in the future\n"
            )
            return

        df_future = values[future_index[0]:future_index[-1]]  # noqa: E203
        values = values[:s_end_date]  # type: ignore

    l_predictions, model = arima_model.get_arima_model(values, arima_order,
                                                       n_predict, seasonal, ic)

    # Prediction data
    if not time_res:
        l_pred_days = get_next_stock_market_days(
            last_stock_day=values.index[-1],
            n_next_days=n_predict,
        )
    else:
        l_pred_days = pd.date_range(values.index[-1],
                                    periods=n_predict + 1,
                                    freq=time_res)[1:]

    df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price")

    if results:
        console.print(model.summary())
        console.print("")

    # This plot has 1 axes
    if external_axes is None:
        _, ax = plt.subplots(figsize=plot_autoscale(), dpi=PLOT_DPI)
    else:
        if (not s_end_date
                and len(external_axes) != 1) or (s_end_date
                                                 and len(external_axes) != 3):
            logger.error(
                "Expected list of 1 axis item or 3 axis items when backtesting"
            )
            console.print("[red]Expected list of 1 axis item " +
                          "or 3 axis items when backtesting./n[/red]")
            return
        ax = external_axes[0]

    ax.plot(values.index, values)

    # pylint:disable=no-member

    if arima_order:
        # BACKTESTING
        if s_end_date:
            ax.set_title(
                f"BACKTESTING: ARIMA {str(t_order)} on {dataset} - {n_predict} step prediction"
            )
        else:
            ax.set_title(
                f"ARIMA {str(t_order)} on {dataset} - {n_predict} step prediction"
            )
    else:
        # BACKTESTING
        if s_end_date:
            ax.set_title(
                f"BACKTESTING: ARIMA {model.order} on {dataset} - {n_predict} step prediction"
            )
        else:
            plt.title(
                f"ARIMA {model.order} on {dataset} - {n_predict} step prediction"
            )
    ax.set_xlim(values.index[0], l_pred_days[-1])
    ax.set_ylabel("Value")
    ax.plot(
        [values.index[-1], df_pred.index[0]],
        [values.values[-1], df_pred.values[0]],
        color=theme.up_color,
        linestyle="--",
    )
    ax.plot(df_pred.index, df_pred, color=theme.up_color)
    ax.axvspan(values.index[-1], df_pred.index[-1], alpha=0.2)
    _, _, ymin, ymax = plt.axis()
    ax.vlines(values.index[-1], ymin, ymax, linestyle="--")

    # BACKTESTING
    if s_end_date:
        ax.plot(
            df_future.index,
            df_future,
            color=theme.up_color,
            linestyle="--",
        )
        ax.plot(
            [values.index[-1], df_future.index[0]],
            [
                values.values[-1],
                df_future.values[0],
            ],
            color=theme.up_color,
            linestyle="--",
        )

    theme.style_primary_axis(ax)

    if external_axes is None:
        theme.visualize_output()

    # BACKTESTING
    if s_end_date:
        # This plot has 1 axes
        if external_axes is None:
            _, axes = plt.subplots(2,
                                   1,
                                   sharex=True,
                                   figsize=plot_autoscale(),
                                   dpi=PLOT_DPI)
            (ax2, ax3) = axes
        else:
            if len(external_axes) != 3:
                logger.error("Expected list of one axis item.")
                console.print("[red]Expected list of 1 axis item./n[/red]")
                return
            (_, ax2, ax3) = external_axes

        ax2.plot(
            df_future.index,
            df_future,
            color=theme.up_color,
            linestyle="--",
        )
        ax2.plot(df_pred.index, df_pred)
        ax2.scatter(
            df_future.index,
            df_future,
            color=theme.up_color,
        )
        ax2.plot(
            [values.index[-1], df_future.index[0]],
            [
                values.values[-1],
                df_future.values[0],
            ],
            color=theme.up_color,
            linestyle="--",
        )
        ax2.scatter(df_pred.index, df_pred)
        ax2.plot(
            [values.index[-1], df_pred.index[0]],
            [values.values[-1], df_pred.values[0]],
            linestyle="--",
        )
        ax2.set_title("BACKTESTING: Values")
        ax2.set_xlim(
            values.index[-1],
            df_pred.index[-1] + datetime.timedelta(days=1),
        )
        ax2.set_ylabel("Value")
        ax2.legend(["Real data", "Prediction data"])
        theme.style_primary_axis(ax2)

        ax3.axhline(y=0, linestyle="--")
        ax3.plot(
            df_future.index,
            100 * (df_pred.values - df_future.values) / df_future.values,
            color=theme.down_color,
        )
        ax3.scatter(
            df_future.index,
            100 * (df_pred.values - df_future.values) / df_future.values,
            color=theme.down_color,
        )
        ax3.set_title("BACKTESTING: % Error")
        ax3.plot(
            [values.index[-1], df_future.index[0]],
            [
                0,
                100 * (df_pred.values[0] - df_future.values[0]) /
                df_future.values[0],
            ],
            ls="--",
            color=theme.down_color,
        )
        ax3.set_xlim(
            values.index[-1],
            df_pred.index[-1] + datetime.timedelta(days=1),
        )
        ax3.set_ylabel("Prediction Error (%)")
        theme.style_primary_axis(ax3)

        if external_axes is None:
            theme.visualize_output()

        # Refactor prediction dataframe for backtesting print
        df_pred.name = "Prediction"
        df_pred = df_pred.to_frame()
        df_pred["Real"] = df_future.values

        if gtff.USE_COLOR:
            df_pred["Real"] = df_pred["Real"].astype(float)
            df_pred["Prediction"] = df_pred["Prediction"].astype(float)
            df_pred["Dif"] = 100 * (df_pred.Prediction -
                                    df_pred.Real) / df_pred.Real
            print_rich_table(
                df_pred,
                headers=["Predicted", "Actual", "% Difference"],
                index_name="Date",
                show_index=True,
                title="ARIMA Model",
            )
        else:
            df_pred["Real"] = df_pred["Real"].astype(float)
            df_pred["Prediction"] = df_pred["Predicted"].astype(float)
            df_pred["Dif"] = 100 * (df_pred.Prediction -
                                    df_pred.Real) / df_pred.Real
            print_rich_table(
                df_pred,
                headers=["Date", "Predicted", "Actual", "% Difference"],
                show_index=True,
                title="ARIMA Model",
            )

        console.print("")
        print_prediction_kpis(df_pred["Real"].values,
                              df_pred["Prediction"].values)

    else:
        # Print prediction data
        print_pretty_prediction(df_pred, values.values[-1])
    export_data(export, os.path.dirname(os.path.abspath(__file__)), "arima")
    console.print("")
示例#10
0
def conv1d(other_args: List[str], s_ticker: str, df_stock: pd.DataFrame):
    """
    Train a 1D Convolutional Neural Net (1D CNN)
    Parameters
    ----------
    other_args:List[str]
        Argparse arguments
    s_ticker: str
        Stock ticker
    df_stock: pd.DataFrame
        Dataframe of stock prices
    """
    try:
        ns_parser = parse_args(
            prog="conv1d",
            description="""1D CNN.""",
            other_args=other_args,
        )
        if not ns_parser:
            return
        (
            X_train,
            X_valid,
            y_train,
            y_valid,
            _,
            _,
            _,
            y_dates_valid,
            forecast_data_input,
            dates_forecast_input,
            scaler,
            is_error,
        ) = prepare_scale_train_valid_test(df_stock["Adj Close"], ns_parser)
        if is_error:
            return
        print(
            f"Training on {X_train.shape[0]} sequences of length {X_train.shape[1]}.  Using {X_valid.shape[0]} sequences "
            f" of length {X_valid.shape[1]} for validation. Model will run {ns_parser.n_loops} loops"
        )
        future_dates = get_next_stock_market_days(dates_forecast_input[-1],
                                                  n_next_days=ns_parser.n_days)

        preds = np.zeros(
            (ns_parser.n_loops, X_valid.shape[0], ns_parser.n_days))
        forecast_data = np.zeros((ns_parser.n_loops, ns_parser.n_days))
        for i in range(ns_parser.n_loops):
            # Build Neural Network model
            model = build_neural_network_model(
                cfg_nn_models.Convolutional,
                ns_parser.n_inputs,
                ns_parser.n_days,
            )

            model.compile(
                optimizer=optimizers[cfg_nn_models.Optimizer](
                    learning_rate=ns_parser.lr),
                loss=cfg_nn_models.Loss,
            )

            model.fit(
                X_train.reshape(X_train.shape[0], X_train.shape[1], 1),
                y_train,
                epochs=ns_parser.n_epochs,
                verbose=True,
                batch_size=ns_parser.n_batch_size,
                validation_data=(
                    X_valid.reshape(X_valid.shape[0], X_valid.shape[1], 1),
                    y_valid,
                ),
                callbacks=[es],
            )

            preds[i] = model.predict(
                X_valid.reshape(X_valid.shape[0], X_valid.shape[1],
                                1)).reshape(X_valid.shape[0], ns_parser.n_days)
            forecast_data[i] = forecast(forecast_data_input, future_dates,
                                        model, scaler).values.flat

        forecast_data_df = pd.DataFrame(forecast_data.T, index=future_dates)
        if ns_parser.n_loops > 1:
            forecast_data_df["Median"] = forecast_data_df.median(axis=1)
            print_pretty_prediction(forecast_data_df["Median"],
                                    df_stock["Adj Close"].values[-1])
        else:
            print_pretty_prediction(forecast_data_df[0],
                                    df_stock["Adj Close"].values[-1])
        plot_data_predictions(
            df_stock,
            np.median(preds, axis=0),
            y_valid,
            y_dates_valid,
            scaler,
            f"Conv1D Model on {s_ticker}",
            forecast_data_df,
            ns_parser.n_loops,
        )
        print("")

    except Exception as e:
        print(e)
        traceback.print_exc()
        print("")

    finally:
        restore_env()
示例#11
0
def display_exponential_smoothing(
    ticker: str,
    values: Union[pd.DataFrame, pd.Series],
    n_predict: int,
    trend: str = "N",
    seasonal: str = "N",
    seasonal_periods: int = 5,
    s_end_date: str = "",
    export: str = "",
    time_res: str = "",
    external_axes: Optional[List[plt.Axes]] = None,
):
    """Perform exponential smoothing

    Parameters
    ----------
    ticker : str
        Dataset being smoothed
    values : Union[pd.DataFrame, pd.Series]
        Raw data
    n_predict : int
        Days to predict
    trend : str, optional
        Trend variable, by default "N"
    seasonal : str, optional
        Seasonal variable, by default "N"
    seasonal_periods : int, optional
        Number of seasonal periods, by default 5
    s_end_date : str, optional
        End date for backtesting, by default ""
    export : str, optional
        Format to export data, by default ""
    time_res : str
        Resolution for data, allowing for predicting outside of standard market days
    external_axes : Optional[List[plt.Axes]], optional
        External axes (1 axis is expected in the list), by default None
    """
    if s_end_date:
        if not time_res:
            future_index = get_next_stock_market_days(
                last_stock_day=s_end_date, n_next_days=n_predict
            )
        else:
            future_index = pd.date_range(
                s_end_date, periods=n_predict + 1, freq=time_res
            )[1:]

        if future_index[-1] > datetime.datetime.now():
            console.print(
                "Backtesting not allowed,"
                + " since End Date + Prediction days is in the future\n"
            )
            return

        df_future = values[future_index[0] : future_index[-1]]  # noqa: E203
        values = values[:s_end_date]  # type: ignore

    # Get ETS model
    model, title, forecast = ets_model.get_exponential_smoothing_model(
        values, trend, seasonal, seasonal_periods, n_predict
    )

    if not forecast:
        console.print("No forecast made.  Model did not converge.\n")
        return

    if np.isnan(forecast).any():
        console.print("Model predicted NaN values.  Runtime Error.\n")
        return

    if not time_res:
        l_pred_days = get_next_stock_market_days(
            last_stock_day=values.index[-1],
            n_next_days=n_predict,
        )
    else:
        l_pred_days = pd.date_range(
            values.index[-1], periods=n_predict + 1, freq=time_res
        )[1:]

    df_pred = pd.Series(forecast, index=l_pred_days, name="Price")

    console.print(f"\n{title}")
    console.print("\nFit model parameters:")
    for key, value in model.params.items():
        console.print(f"{key} {' '*(18-len(key))}: {value}")

    console.print("\nAssess fit model:")
    console.print(f"AIC: {round(model.aic, 2)}")
    console.print(f"BIC: {round(model.bic, 2)}")
    console.print(f"SSE: {round(model.sse, 2)}\n")

    # Plotting

    # This plot has 1 axes
    if external_axes is None:
        _, ax1 = plt.subplots(figsize=plot_autoscale(), dpi=PLOT_DPI)
    else:
        if (not s_end_date and len(external_axes) != 1) or (
            s_end_date and len(external_axes) != 3
        ):
            console.print(
                "[red]Expected list of 1 axis item "
                + "or 3 axis items when backtesting./n[/red]"
            )
            return
        ax1 = external_axes[0]

    ax1.plot(values.index, values.values)

    # BACKTESTING
    if s_end_date:
        ax1.set_title(f"BACKTESTING: {title} on {ticker}", fontsize=12)
    else:
        ax1.set_title(f"{title} on {ticker}", fontsize=12)

    ax1.set_xlim(
        values.index[0],
        get_next_stock_market_days(df_pred.index[-1], 1)[-1],
    )
    ax1.set_ylabel("Value")
    ax1.plot(
        [values.index[-1], df_pred.index[0]],
        [values.values[-1], df_pred.values[0]],
        color=theme.down_color,
        linestyle="--",
    )
    ax1.plot(df_pred.index, df_pred, color=theme.down_color)
    ax1.axvspan(
        values.index[-1],
        df_pred.index[-1],
        facecolor=theme.down_color,
        alpha=0.2,
    )
    _, _, ymin, ymax = plt.axis()
    ax1.vlines(
        values.index[-1],
        ymin,
        ymax,
        linestyle="--",
        color=theme.get_colors(reverse=True)[0],
    )

    # BACKTESTING
    if s_end_date:
        ax1.plot(
            df_future.index,
            df_future,
            color=theme.up_color,
            linestyle="--",
        )
        ax1.plot(
            [values.index[-1], df_future.index[0]],
            [
                values.values[-1],
                df_future.values[0],
            ],
            color=theme.up_color,
            linestyle="--",
        )

    theme.style_primary_axis(ax1)

    if external_axes is None:
        theme.visualize_output()

    # BACKTESTING
    if s_end_date:
        # This plot has 1 axes
        if external_axes is None:
            _, axes = plt.subplots(
                2, 1, sharex=True, figsize=plot_autoscale(), dpi=PLOT_DPI
            )
            (ax2, ax3) = axes
        else:
            if len(external_axes) != 3:
                console.print("[red]Expected list of 1 axis item./n[/red]")
                return
            (_, ax2, ax3) = external_axes

        ax2.plot(
            df_future.index,
            df_future,
            color=theme.up_color,
            linestyle="--",
        )
        ax2.plot(df_pred.index, df_pred)
        ax2.scatter(
            df_future.index,
            df_future,
            color=theme.up_color,
        )
        ax2.plot(
            [values.index[-1], df_future.index[0]],
            [
                values.values[-1],
                df_future.values[0],
            ],
            color=theme.up_color,
            linestyle="--",
        )
        ax2.scatter(df_pred.index, df_pred)
        ax2.plot(
            [values.index[-1], df_pred.index[0]],
            [values.values[-1], df_pred.values[0]],
            linestyle="--",
        )
        ax2.set_title("BACKTESTING: Values")
        ax2.set_xlim(
            values.index[-1],
            df_pred.index[-1] + datetime.timedelta(days=1),
        )
        ax2.set_ylabel("Value")
        ax2.legend(["Real data", "Prediction data"])
        theme.style_primary_axis(ax2)

        ax3.axhline(y=0, linestyle="--")
        ax3.plot(
            df_future.index,
            100 * (df_pred.values - df_future.values) / df_future.values,
            color=theme.down_color,
        )
        ax3.scatter(
            df_future.index,
            100 * (df_pred.values - df_future.values) / df_future.values,
            color=theme.down_color,
        )
        ax3.set_title("BACKTESTING: % Error")
        ax3.plot(
            [values.index[-1], df_future.index[0]],
            [
                0,
                100 * (df_pred.values[0] - df_future.values[0]) / df_future.values[0],
            ],
            ls="--",
            color=theme.down_color,
        )
        ax3.set_xlim(
            values.index[-1],
            df_pred.index[-1] + datetime.timedelta(days=1),
        )
        ax3.set_ylabel("Prediction Error (%)")
        theme.style_primary_axis(ax3)

        if external_axes is None:
            theme.visualize_output()

        # Refactor prediction dataframe for backtesting print
        df_pred.name = "Prediction"
        df_pred = df_pred.to_frame()
        df_pred["Real"] = df_future

        if gtff.USE_COLOR:

            patch_pandas_text_adjustment()

            console.print("Time         Real [$]  x  Prediction [$]")
            console.print(
                df_pred.apply(
                    lambda_price_prediction_backtesting_color, axis=1
                ).to_string()
            )
        else:
            console.print(df_pred[["Real", "Prediction"]].round(2).to_string())

        console.print("")
        print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values)

    else:
        # Print prediction data
        print_pretty_prediction(df_pred, values.values[-1])
    export_data(export, os.path.dirname(os.path.abspath(__file__)), "ets")
示例#12
0
def k_nearest_neighbors(other_args: List[str], s_ticker: str,
                        df_stock: pd.DataFrame):
    """
    Train KNN model
    Parameters
    ----------
    other_args: List[str]
        List of argparse arguments
    s_ticker: str
        Ticker
    df_stock: pd.DataFrame
        Dataframe of stock prices

    Returns
    -------

    """
    parser = argparse.ArgumentParser(
        add_help=False,
        prog="knn",
        description="""
            K nearest neighbors is a simple algorithm that stores all
            available cases and predict the numerical target based on a similarity measure
            (e.g. distance functions).
        """,
    )

    parser.add_argument(
        "-i",
        "--input",
        action="store",
        dest="n_inputs",
        type=check_positive,
        default=40,
        help="number of days to use as input for prediction.",
    )
    parser.add_argument(
        "-d",
        "--days",
        action="store",
        dest="n_days",
        type=check_positive,
        default=5,
        help="prediction days.",
    )
    parser.add_argument(
        "-j",
        "--jumps",
        action="store",
        dest="n_jumps",
        type=check_positive,
        default=1,
        help="number of jumps in training data.",
    )
    parser.add_argument(
        "-n",
        "--neighbors",
        action="store",
        dest="n_neighbors",
        type=check_positive,
        default=20,
        help="number of neighbors to use on the algorithm.",
    )
    parser.add_argument(
        "-e",
        "--end",
        action="store",
        type=valid_date,
        dest="s_end_date",
        default=None,
        help="The end date (format YYYY-MM-DD) to select for testing",
    )

    parser.add_argument(
        "-t",
        "--test_size",
        default=0.2,
        dest="valid_split",
        type=float,
        help="Percentage of data to validate in sample",
    )
    parser.add_argument(
        "-p",
        "--pp",
        action="store",
        dest="s_preprocessing",
        default="none",
        choices=["normalization", "standardization", "minmax", "none"],
        help="pre-processing data.",
    )
    parser.add_argument(
        "--no_shuffle",
        action="store_false",
        dest="no_shuffle",
        default=True,
        help="Specify if shuffling validation inputs.",
    )

    try:
        ns_parser = parse_known_args_and_warn(parser, other_args)
        if not ns_parser:
            return

        (
            X_train,
            X_valid,
            y_train,
            y_valid,
            _,
            _,
            _,
            y_dates_valid,
            forecast_data_input,
            dates_forecast_input,
            scaler,
            is_error,
        ) = prepare_scale_train_valid_test(df_stock["Adj Close"], ns_parser)
        if is_error:
            print("Error preparing data")
            return
        print(
            f"Training on {X_train.shape[0]} sequences of length {X_train.shape[1]}.  Using {X_valid.shape[0]} sequences "
            f" of length {X_valid.shape[1]} for validation")
        future_dates = get_next_stock_market_days(dates_forecast_input[-1],
                                                  n_next_days=ns_parser.n_days)

        # Machine Learning model
        knn = neighbors.KNeighborsRegressor(n_neighbors=ns_parser.n_neighbors)
        knn.fit(
            X_train.reshape(X_train.shape[0], X_train.shape[1]),
            y_train.reshape(y_train.shape[0], y_train.shape[1]),
        )

        preds = knn.predict(X_valid.reshape(X_valid.shape[0],
                                            X_valid.shape[1]))
        forecast_data = knn.predict(forecast_data_input.reshape(1, -1))

        forecast_data_df = pd.DataFrame(
            [i if i > 0 else 0 for i in forecast_data.T], index=future_dates)
        print_pretty_prediction(forecast_data_df[0],
                                df_stock["Adj Close"].values[-1])
        plot_data_predictions(
            df_stock,
            preds,
            y_valid,
            y_dates_valid,
            scaler,
            f"KNN Model with {ns_parser.n_neighbors} Neighbors on {s_ticker}",
            forecast_data_df,
            1,
        )
        print("")

    except Exception as e:
        print(e)
        print("")
示例#13
0
def exponential_smoothing(other_args: List[str], s_ticker: str,
                          df_stock: pd.DataFrame):
    """
    Perform exponential smoothing forecasting
    Parameters
    ----------
    other_args: List[str]
        Argparse arguments
    s_ticker: str
        Loaded ticker
    df_stock: pd.DataFrame
        Loaded stock dataframe

    """
    parser = argparse.ArgumentParser(
        add_help=False,
        prog="ets",
        description="""
            Exponential Smoothing, see https://otexts.com/fpp2/taxonomy.html

            Trend='N',  Seasonal='N': Simple Exponential Smoothing
            Trend='N',  Seasonal='A': Exponential Smoothing
            Trend='N',  Seasonal='M': Exponential Smoothing
            Trend='A',  Seasonal='N': Holt’s linear method
            Trend='A',  Seasonal='A': Additive Holt-Winters’ method
            Trend='A',  Seasonal='M': Multiplicative Holt-Winters’ method
            Trend='Ad', Seasonal='N': Additive damped trend method
            Trend='Ad', Seasonal='A': Exponential Smoothing
            Trend='Ad', Seasonal='M': Holt-Winters’ damped method
            Trend component: N: None, A: Additive, Ad: Additive Damped
            Seasonality component: N: None, A: Additive, M: Multiplicative
        """,
    )

    parser.add_argument(
        "-d",
        "--days",
        action="store",
        dest="n_days",
        type=check_positive,
        default=5,
        help="prediction days.",
    )
    parser.add_argument(
        "-t",
        "--trend",
        action="store",
        dest="trend",
        type=check_valid_trend,
        default="N",
        help="Trend component: N: None, A: Additive, Ad: Additive Damped.",
    )
    parser.add_argument(
        "-s",
        "--seasonal",
        action="store",
        dest="seasonal",
        type=check_valid_seasonal,
        default="N",
        help="Seasonality component: N: None, A: Additive, M: Multiplicative.",
    )
    parser.add_argument(
        "-p",
        "--periods",
        action="store",
        dest="seasonal_periods",
        type=check_positive,
        default=5,
        help="Seasonal periods.",
    )
    parser.add_argument(
        "-e",
        "--end",
        action="store",
        type=valid_date,
        dest="s_end_date",
        default=None,
        help="The end date (format YYYY-MM-DD) to select - Backtesting",
    )

    try:
        ns_parser = parse_known_args_and_warn(parser, other_args)
        if not ns_parser:
            return

        # BACKTESTING
        if ns_parser.s_end_date:

            if ns_parser.s_end_date < df_stock.index[0]:
                print(
                    "Backtesting not allowed, since End Date is older than Start Date of historical data\n"
                )
                return

            if (ns_parser.s_end_date < get_next_stock_market_days(
                    last_stock_day=df_stock.index[0],
                    n_next_days=5 + ns_parser.n_days)[-1]):
                print(
                    "Backtesting not allowed, since End Date is too close to Start Date to train model\n"
                )
                return

            future_index = get_next_stock_market_days(
                last_stock_day=ns_parser.s_end_date,
                n_next_days=ns_parser.n_days)

            if future_index[-1] > datetime.datetime.now():
                print(
                    "Backtesting not allowed, since End Date + Prediction days is in the future\n"
                )
                return

            df_future = df_stock[future_index[0]:future_index[-1]]
            df_stock = df_stock[:ns_parser.s_end_date]

        # Get ETS model
        model, title = get_exponential_smoothing_model(
            df_stock["Adj Close"].values,
            ns_parser.trend,
            ns_parser.seasonal,
            ns_parser.seasonal_periods,
        )

        if model.mle_retvals.success:
            forecast = [
                i if i > 0 else 0 for i in model.forecast(ns_parser.n_days)
            ]

            l_pred_days = get_next_stock_market_days(
                last_stock_day=df_stock["Adj Close"].index[-1],
                n_next_days=ns_parser.n_days,
            )
            df_pred = pd.Series(forecast, index=l_pred_days, name="Price")

            if ~np.isnan(forecast).any():

                print(f"\n{title}")
                print("\nFit model parameters:")
                for key, value in model.params.items():
                    print(f"{key} {' '*(18-len(key))}: {value}")

                print("\nAssess fit model:")
                print(f"AIC: {round(model.aic, 2)}")
                print(f"BIC: {round(model.bic, 2)}")
                print(f"SSE: {round(model.sse, 2)}\n")

                # Plotting
                plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI)
                plt.plot(df_stock.index, df_stock["Adj Close"], lw=2)
                # BACKTESTING
                if ns_parser.s_end_date:
                    plt.title(f"BACKTESTING: {title} on {s_ticker}")
                else:
                    plt.title(f"{title} on {s_ticker}")

                plt.xlim(
                    df_stock.index[0],
                    get_next_stock_market_days(df_pred.index[-1], 1)[-1],
                )
                plt.xlabel("Time")
                plt.ylabel("Share Price ($)")
                plt.grid(b=True, which="major", color="#666666", linestyle="-")
                plt.minorticks_on()
                plt.grid(b=True,
                         which="minor",
                         color="#999999",
                         linestyle="-",
                         alpha=0.2)
                plt.plot(
                    [df_stock.index[-1], df_pred.index[0]],
                    [df_stock["Adj Close"].values[-1], df_pred.values[0]],
                    lw=1,
                    c="tab:green",
                    linestyle="--",
                )
                plt.plot(df_pred.index, df_pred, lw=2, c="tab:green")
                plt.axvspan(
                    df_stock.index[-1],
                    df_pred.index[-1],
                    facecolor="tab:orange",
                    alpha=0.2,
                )
                _, _, ymin, ymax = plt.axis()
                plt.vlines(
                    df_stock.index[-1],
                    ymin,
                    ymax,
                    linewidth=1,
                    linestyle="--",
                    color="k",
                )

                # BACKTESTING
                if ns_parser.s_end_date:
                    plt.plot(
                        df_future.index,
                        df_future["Adj Close"],
                        lw=2,
                        c="tab:blue",
                        ls="--",
                    )
                    plt.plot(
                        [df_stock.index[-1], df_future.index[0]],
                        [
                            df_stock["Adj Close"].values[-1],
                            df_future["Adj Close"].values[0],
                        ],
                        lw=1,
                        c="tab:blue",
                        linestyle="--",
                    )

                if gtff.USE_ION:
                    plt.ion()

                plt.show()

                # BACKTESTING
                if ns_parser.s_end_date:
                    plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI)
                    plt.subplot(211)
                    plt.plot(
                        df_future.index,
                        df_future["Adj Close"],
                        lw=2,
                        c="tab:blue",
                        ls="--",
                    )
                    plt.plot(df_pred.index, df_pred, lw=2, c="green")
                    plt.scatter(
                        df_future.index,
                        df_future["Adj Close"],
                        c="tab:blue",
                        lw=3,
                    )
                    plt.plot(
                        [df_stock.index[-1], df_future.index[0]],
                        [
                            df_stock["Adj Close"].values[-1],
                            df_future["Adj Close"].values[0],
                        ],
                        lw=2,
                        c="tab:blue",
                        ls="--",
                    )
                    plt.scatter(df_pred.index, df_pred, c="green", lw=3)
                    plt.plot(
                        [df_stock.index[-1], df_pred.index[0]],
                        [df_stock["Adj Close"].values[-1], df_pred.values[0]],
                        lw=2,
                        c="green",
                        ls="--",
                    )
                    plt.title("BACKTESTING: Real data price versus Prediction")
                    plt.xlim(
                        df_stock.index[-1],
                        df_pred.index[-1] + datetime.timedelta(days=1),
                    )
                    plt.ylabel("Share Price ($)")
                    plt.grid(b=True,
                             which="major",
                             color="#666666",
                             linestyle="-")
                    plt.minorticks_on()
                    plt.grid(b=True,
                             which="minor",
                             color="#999999",
                             linestyle="-",
                             alpha=0.2)
                    plt.legend(["Real data", "Prediction data"])
                    plt.xticks([])

                    plt.subplot(212)
                    plt.axhline(y=0, color="k", linestyle="--", linewidth=2)
                    plt.plot(
                        df_future.index,
                        100 *
                        (df_pred.values - df_future["Adj Close"].values) /
                        df_future["Adj Close"].values,
                        lw=2,
                        c="red",
                    )
                    plt.scatter(
                        df_future.index,
                        100 *
                        (df_pred.values - df_future["Adj Close"].values) /
                        df_future["Adj Close"].values,
                        c="red",
                        lw=5,
                    )
                    plt.title(
                        "BACKTESTING: Error between Real data and Prediction [%]"
                    )
                    plt.plot(
                        [df_stock.index[-1], df_future.index[0]],
                        [
                            0,
                            100 * (df_pred.values[0] -
                                   df_future["Adj Close"].values[0]) /
                            df_future["Adj Close"].values[0],
                        ],
                        lw=2,
                        ls="--",
                        c="red",
                    )
                    plt.xlim(
                        df_stock.index[-1],
                        df_pred.index[-1] + datetime.timedelta(days=1),
                    )
                    plt.xlabel("Time")
                    plt.ylabel("Prediction Error (%)")
                    plt.grid(b=True,
                             which="major",
                             color="#666666",
                             linestyle="-")
                    plt.minorticks_on()
                    plt.grid(b=True,
                             which="minor",
                             color="#999999",
                             linestyle="-",
                             alpha=0.2)
                    plt.legend(["Real data", "Prediction data"])

                    if gtff.USE_ION:
                        plt.ion()

                    plt.show()

                    # Refactor prediction dataframe for backtesting print
                    df_pred.name = "Prediction"
                    df_pred = df_pred.to_frame()
                    df_pred["Real"] = df_future["Adj Close"]

                    if gtff.USE_COLOR:

                        patch_pandas_text_adjustment()

                        print("Time         Real [$]  x  Prediction [$]")
                        print(
                            df_pred.apply(price_prediction_backtesting_color,
                                          axis=1).to_string())
                    else:
                        print(df_pred[["Real",
                                       "Prediction"]].round(2).to_string())

                    print("")
                    print_prediction_kpis(df_pred["Real"].values,
                                          df_pred["Prediction"].values)

                else:
                    # Print prediction data
                    print_pretty_prediction(df_pred,
                                            df_stock["Adj Close"].values[-1])
                print("")

            else:
                print(
                    "RuntimeWarning: invalid value encountered in double_scalars."
                )
        else:
            print("ConvergenceWarning: Optimization failed to converge.")

    except Exception as e:
        print(e)
        print("")
示例#14
0
def regression(other_args: List[str], s_ticker: str, df_stock: pd.DataFrame,
               polynomial: int):
    """
    Train a regression model
    Parameters
    ----------
    other_args: List[str]
        Argparse arguments
    s_ticker: str
        Stock ticker
    df_stock: pd.DataFrame
        Dataframe of stock prices
    polynomial: int
        Order of polynomial

    """
    parser = argparse.ArgumentParser(
        add_help=False,
        prog="regression",
        description="""
            Regression attempts to model the relationship between
            two variables by fitting a linear/quadratic/cubic/other equation to
            observed data. One variable is considered to be an explanatory variable,
            and the other is considered to be a dependent variable.
        """,
    )

    parser.add_argument(
        "-i",
        "--input",
        action="store",
        dest="n_inputs",
        type=check_positive,
        default=40,
        help="number of days to use for prediction.",
    )
    parser.add_argument(
        "-d",
        "--days",
        action="store",
        dest="n_days",
        type=check_positive,
        default=5,
        help="prediction days.",
    )
    parser.add_argument(
        "-j",
        "--jumps",
        action="store",
        dest="n_jumps",
        type=check_positive,
        default=1,
        help="number of jumps in training data.",
    )
    parser.add_argument(
        "-e",
        "--end",
        action="store",
        type=valid_date,
        dest="s_end_date",
        default=None,
        help="The end date (format YYYY-MM-DD) to select - Backtesting",
    )

    if polynomial == USER_INPUT:
        parser.add_argument(
            "-p",
            "--polynomial",
            action="store",
            dest="n_polynomial",
            type=check_positive,
            required=True,
            help="polynomial associated with regression.",
        )

    try:
        ns_parser = parse_known_args_and_warn(parser, other_args)
        if not ns_parser:
            return

        # BACKTESTING
        if ns_parser.s_end_date:
            if ns_parser.s_end_date < df_stock.index[0]:
                print(
                    "Backtesting not allowed, since End Date is older than Start Date of historical data\n"
                )
                return

            if ns_parser.s_end_date < get_next_stock_market_days(
                    last_stock_day=df_stock.index[0],
                    n_next_days=ns_parser.n_inputs + ns_parser.n_days,
            )[-1]:
                print(
                    "Backtesting not allowed, since End Date is too close to Start Date to train model\n"
                )
                return

            future_index = get_next_stock_market_days(
                last_stock_day=ns_parser.s_end_date,
                n_next_days=ns_parser.n_days)

            if future_index[-1] > datetime.datetime.now():
                print(
                    "Backtesting not allowed, since End Date + Prediction days is in the future\n"
                )
                return

            df_future = df_stock[future_index[0]:future_index[-1]]
            df_stock = df_stock[:ns_parser.s_end_date]

        # Split training data
        stock_x, stock_y = splitTrain.split_train(
            df_stock["Adj Close"].values,
            ns_parser.n_inputs,
            ns_parser.n_days,
            ns_parser.n_jumps,
        )

        if not stock_x:
            print("Given the model parameters more training data is needed.\n")
            return

        # Machine Learning model
        if polynomial == LINEAR:
            model = linear_model.LinearRegression(n_jobs=-1)
        else:
            if polynomial == USER_INPUT:
                polynomial = ns_parser.n_polynomial
            model = pipeline.make_pipeline(
                preprocessing.PolynomialFeatures(polynomial),
                linear_model.Ridge())

        model.fit(stock_x, stock_y)
        l_predictions = [
            i if i > 0 else 0 for i in model.predict(
                df_stock["Adj Close"].values[-ns_parser.n_inputs:].reshape(
                    1, -1))[0]
        ]

        # Prediction data
        l_pred_days = get_next_stock_market_days(
            last_stock_day=df_stock["Adj Close"].index[-1],
            n_next_days=ns_parser.n_days,
        )
        df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price")

        # Plotting
        plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI)
        plt.plot(df_stock.index, df_stock["Adj Close"], lw=2)
        # BACKTESTING
        if ns_parser.s_end_date:
            plt.title(
                f"BACKTESTING: Regression (polynomial {polynomial}) on {s_ticker} - {ns_parser.n_days} days prediction"
            )
        else:
            plt.title(
                f"Regression (polynomial {polynomial}) on {s_ticker} - {ns_parser.n_days} days prediction"
            )
        plt.xlim(df_stock.index[0],
                 get_next_stock_market_days(df_pred.index[-1], 1)[-1])
        plt.xlabel("Time")
        plt.ylabel("Share Price ($)")
        plt.grid(b=True, which="major", color="#666666", linestyle="-")
        plt.minorticks_on()
        plt.grid(b=True,
                 which="minor",
                 color="#999999",
                 linestyle="-",
                 alpha=0.2)
        plt.plot(
            [df_stock.index[-1], df_pred.index[0]],
            [df_stock["Adj Close"].values[-1], df_pred.values[0]],
            lw=1,
            c="tab:green",
            linestyle="--",
        )
        plt.plot(df_pred.index, df_pred, lw=2, c="tab:green")
        plt.axvspan(df_stock.index[-1],
                    df_pred.index[-1],
                    facecolor="tab:orange",
                    alpha=0.2)
        _, _, ymin, ymax = plt.axis()
        plt.vlines(df_stock.index[-1],
                   ymin,
                   ymax,
                   linewidth=1,
                   linestyle="--",
                   color="k")

        # BACKTESTING
        if ns_parser.s_end_date:
            plt.plot(
                df_future.index,
                df_future["Adj Close"],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.plot(
                [df_stock.index[-1], df_future.index[0]],
                [
                    df_stock["Adj Close"].values[-1],
                    df_future["Adj Close"].values[0],
                ],
                lw=1,
                c="tab:blue",
                linestyle="--",
            )

        if gtff.USE_ION:
            plt.ion()

        plt.show()

        # BACKTESTING
        if ns_parser.s_end_date:
            plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI)
            plt.subplot(211)
            plt.plot(
                df_future.index,
                df_future["Adj Close"],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.plot(df_pred.index, df_pred, lw=2, c="green")
            plt.scatter(df_future.index,
                        df_future["Adj Close"],
                        c="tab:blue",
                        lw=3)
            plt.plot(
                [df_stock.index[-1], df_future.index[0]],
                [
                    df_stock["Adj Close"].values[-1],
                    df_future["Adj Close"].values[0],
                ],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.scatter(df_pred.index, df_pred, c="green", lw=3)
            plt.plot(
                [df_stock.index[-1], df_pred.index[0]],
                [df_stock["Adj Close"].values[-1], df_pred.values[0]],
                lw=2,
                c="green",
                ls="--",
            )
            plt.title("BACKTESTING: Real data price versus Prediction")
            plt.xlim(df_stock.index[-1],
                     df_pred.index[-1] + datetime.timedelta(days=1))
            plt.xticks(
                [
                    df_stock.index[-1],
                    df_pred.index[-1] + datetime.timedelta(days=1)
                ],
                visible=True,
            )
            plt.ylabel("Share Price ($)")
            plt.grid(b=True, which="major", color="#666666", linestyle="-")
            plt.minorticks_on()
            plt.grid(b=True,
                     which="minor",
                     color="#999999",
                     linestyle="-",
                     alpha=0.2)
            plt.legend(["Real data", "Prediction data"])
            plt.xticks([])

            plt.subplot(212)
            plt.axhline(y=0, color="k", linestyle="--", linewidth=2)
            plt.plot(
                df_future.index,
                100 * (df_pred.values - df_future["Adj Close"].values) /
                df_future["Adj Close"].values,
                lw=2,
                c="red",
            )
            plt.scatter(
                df_future.index,
                100 * (df_pred.values - df_future["Adj Close"].values) /
                df_future["Adj Close"].values,
                c="red",
                lw=5,
            )
            plt.title(
                "BACKTESTING: Error between Real data and Prediction [%]")
            plt.plot(
                [df_stock.index[-1], df_future.index[0]],
                [
                    0,
                    100 *
                    (df_pred.values[0] - df_future["Adj Close"].values[0]) /
                    df_future["Adj Close"].values[0],
                ],
                lw=2,
                ls="--",
                c="red",
            )
            plt.xlim(df_stock.index[-1],
                     df_pred.index[-1] + datetime.timedelta(days=1))
            plt.xticks(
                [
                    df_stock.index[-1],
                    df_pred.index[-1] + datetime.timedelta(days=1)
                ],
                visible=True,
            )
            plt.xlabel("Time")
            plt.ylabel("Prediction Error (%)")
            plt.grid(b=True, which="major", color="#666666", linestyle="-")
            plt.minorticks_on()
            plt.grid(b=True,
                     which="minor",
                     color="#999999",
                     linestyle="-",
                     alpha=0.2)
            plt.legend(["Real data", "Prediction data"])

            if gtff.USE_ION:
                plt.ion()

            plt.show()

            # Refactor prediction dataframe for backtesting print
            df_pred.name = "Prediction"
            df_pred = df_pred.to_frame()
            df_pred["Real"] = df_future["Adj Close"]

            if gtff.USE_COLOR:

                patch_pandas_text_adjustment()

                print("Time         Real [$]  x  Prediction [$]")
                print(
                    df_pred.apply(price_prediction_backtesting_color,
                                  axis=1).to_string())
            else:
                print(df_pred[["Real", "Prediction"]].round(2).to_string())

            print("")
            print_prediction_kpis(df_pred["Real"].values,
                                  df_pred["Prediction"].values)

        else:
            # Print prediction data
            print_pretty_prediction(df_pred, df_stock["Adj Close"].values[-1])
        print("")

    except SystemExit:
        print("")
    except Exception as e:
        print(e)
        print("")
示例#15
0
def display_rnn(
    dataset: str,
    data: Union[pd.Series, pd.DataFrame],
    n_input_days: int,
    n_predict_days: int,
    learning_rate: float,
    epochs: int,
    batch_size: int,
    test_size: float,
    n_loops: int,
    no_shuffle: bool,
):
    """Display trained RNN model

    Parameters
    ----------
    dataset : str
        Dataset for model
    data : Union[pd.Series, pd.DataFrame]
        Data to feed to model
    n_input_days : int
        Number of inputs to train
    n_predict_days : int
        Number of outputs to predict
    learning_rate : float
        MLP learning rate
    epochs : int
        Number of training epochs
    batch_size : int
        Training batch size
    test_size : float
        Size of test set
    n_loops : int
        Number of loops to perform for model
    no_shuffle : bool
        Flag to not randomly shuffle data
    """

    (
        forecast_data_df,
        preds,
        y_valid,
        y_dates_valid,
        scaler,
    ) = neural_networks_model.rnn_model(
        data,
        n_input_days,
        n_predict_days,
        learning_rate,
        epochs,
        batch_size,
        test_size,
        n_loops,
        no_shuffle,
    )

    if n_loops > 1:
        forecast_data_df["Median"] = forecast_data_df.median(axis=1)
        print_pretty_prediction(forecast_data_df["Median"], data.values[-1])
    else:
        print_pretty_prediction(forecast_data_df[0], data.values[-1])
    plot_data_predictions(
        data,
        np.median(preds, axis=0),
        y_valid,
        y_dates_valid,
        scaler,
        f"RNN Model on {dataset}",
        forecast_data_df,
        n_loops,
    )
    print("")