def main(tickers='AAPL', start=None, end=None, n_steps=21):
    data = OrderedDict()
    pred_data = OrderedDict()
    forecast_data = OrderedDict()

    for ticker in tickers:
        data[ticker] = fc.get_time_series(ticker, start, end)

        # log_returns
        data[ticker]['log_returns'] = np.log(
            data[ticker]['adj_close'] / data[ticker]['adj_close'].shift(1))

        data[ticker]['log_returns'].dropna(inplace=True)

        # plotting the histogram of returns
        fc.plot_histogram(data[ticker]['log_returns'])

        fc.plot_time_series(data[ticker]['log_returns'], lags=30)

        print("{} Series\n"
              "-------------\n"
              "mean: {:.3f}\n"
              "median: {:.3f}\n"
              "maximum: {:.3f}\n"
              "minimum: {:.3f}\n"
              "variance: {:.3f}\n"
              "standard deviation: {:.3f}\n"
              "skewness: {:.3f}\n"
              "kurtosis: {:.3f}".format(ticker,
                                        data[ticker]['adj_close'].mean(),
                                        data[ticker]['adj_close'].median(),
                                        data[ticker]['adj_close'].max(),
                                        data[ticker]['adj_close'].min(),
                                        data[ticker]['adj_close'].var(),
                                        data[ticker]['adj_close'].std(),
                                        data[ticker]['adj_close'].skew(),
                                        data[ticker]['adj_close'].kurtosis()))

        adfstat, pvalue, critvalues, resstore, dagostino_results, shapiro_results, ks_results, anderson_results, kpss_results = fc.get_stationarity_statistics(
            data[ticker]['log_returns'].values)

        print(
            "{} Stationarity Statistics\n"
            "-------------\n"
            "Augmented Dickey-Fuller unit root test: {}\n"
            "MacKinnon’s approximate p-value: {}\n"
            "Critical values for the test statistic at the 1 %, 5 %, and 10 % levels: {}\n"
            "D’Agostino and Pearson’s normality test: {}\n"
            "Shapiro-Wilk normality test: {}\n"
            "Kolmogorov-Smirnov goodness of fit test: {}\n"
            "Anderson-Darling test: {}\n"
            "Kwiatkowski, Phillips, Schmidt, and Shin (KPSS) stationarity test: {}"
            .format(ticker, adfstat, pvalue, critvalues, dagostino_results,
                    shapiro_results, ks_results, anderson_results,
                    kpss_results))

        # Fit ARMA model to AAPL returns
        res_tup = fc.get_best_arma_model(data[ticker]['log_returns'])

        res_tup[2].summary()

        # verify stationarity
        adfstat, pvalue, critvalues, resstore, dagostino_results, shapiro_results, ks_results, anderson_results, kpss_results = fc.get_stationarity_statistics(
            res_tup[2].resid.values)

        print(
            "Stationarity Statistics\n"
            "-------------\n"
            "Augmented Dickey-Fuller unit root test: {}\n"
            "MacKinnon’s approximate p-value: {}\n"
            "Critical values for the test statistic at the 1 %, 5 %, and 10 % levels: {}\n"
            "D’Agostino and Pearson’s normality test: {}\n"
            "Shapiro-Wilk normality test: {}\n"
            "Kolmogorov-Smirnov goodness of fit test: {}\n"
            "Anderson-Darling test: {}\n"
            "Kwiatkowski, Phillips, Schmidt, and Shin (KPSS) stationarity test: {}"
            .format(adfstat, pvalue, critvalues, dagostino_results,
                    shapiro_results, ks_results, anderson_results,
                    kpss_results))

        fc.plot_histogram(y=res_tup[2].resid, ticker=ticker, title='ARMA')

        fc.plot_time_series(y=res_tup[2].resid,
                            lags=30,
                            ticker=ticker,
                            title='ARMA')

        # cross-validation testing
        split = rand.uniform(0.60, 0.80)

        train_size = int(len(data[ticker]) * split)

        train, test = data[ticker][0:train_size], data[ticker][
            train_size:len(data[ticker])]

        # in-sample prediction
        pred_data[ticker] = res_tup[2].predict(start=len(train),
                                               end=len(train) + len(test) - 1)

        pred_results = pd.DataFrame(data=dict(
            original=test['log_returns'], prediction=pred_data[ticker].values),
                                    index=test.index)

        print('{} Original Sharpe Ratio:'.format(ticker),
              fc.get_sharpe_ratio(returns=pred_results['original']))
        print('{} Prediction Sharpe Ratio:'.format(ticker),
              fc.get_sharpe_ratio(returns=pred_results['prediction']))

        # prediction plot
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.plot(pred_results['original'])
        ax.plot(pred_results['prediction'])
        ax.set(title='{} ARMA{} In-Sample Return Prediction'.format(
            ticker, res_tup[1]),
               xlabel='time',
               ylabel='$')
        ax.legend(['Original', 'Prediction'])
        fig.tight_layout()
        fig.savefig(
            'charts/{}-ARMA-In-Sample-Return-Prediction'.format(ticker))

        # out-of-sample forecast
        forecast_data[ticker] = res_tup[2].forecast(steps=n_steps)

        # forecast plot
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.plot(forecast_data[ticker][0])
        ax.set(
            title='{} Day {} ARMA{} Out-of-Sample Return Forecast.png'.format(
                n_steps, ticker, res_tup[1]),
            xlabel='time',
            ylabel='$')
        ax.legend(['Forecast'])
        fig.tight_layout()
        fig.savefig(
            'charts/{}-Day-{}-ARMA-Out-of-Sample-Return-Forecast.png'.format(
                n_steps, ticker))

    # end of day plot of all tickers
    fig = plt.figure()
    ax = fig.add_subplot(111)
    for ticker in tickers:
        ax.plot(data[ticker]['adj_close'])
    ax.set(title='Time series plot', xlabel='time', ylabel='$')
    ax.legend(tickers)
    fig.tight_layout()
    fig.savefig('charts/stocks.png')

    return forecast_data
Exemplo n.º 2
0
def main(tickers=['AAPL'], n_steps=21):
    """
    Main entry point of the app
    """
    data = OrderedDict()
    pred_data = OrderedDict()
    forecast_data = OrderedDict()

    for ticker in tickers:
        data[ticker] = fc.get_time_series(ticker)[-500:]

        print("{} Series\n"
              "-------------\n"
              "mean: {:.3f}\n"
              "median: {:.3f}\n"
              "maximum: {:.3f}\n"
              "minimum: {:.3f}\n"
              "variance: {:.3f}\n"
              "standard deviation: {:.3f}\n"
              "skewness: {:.3f}\n"
              "kurtosis: {:.3f}".format(ticker,
                                        data[ticker]['adj_close'].mean(),
                                        data[ticker]['adj_close'].median(),
                                        data[ticker]['adj_close'].max(),
                                        data[ticker]['adj_close'].min(),
                                        data[ticker]['adj_close'].var(),
                                        data[ticker]['adj_close'].std(),
                                        data[ticker]['adj_close'].skew(),
                                        data[ticker]['adj_close'].kurtosis()))

        data[ticker]['log_returns'] = np.log(
            data[ticker]['adj_close'] / data[ticker]['adj_close'].shift(1))

        data[ticker]['log_returns'].dropna(inplace=True)

        adfstat, pvalue, critvalues, resstore, dagostino_results, shapiro_results, ks_results, anderson_results, kpss_results = fc.get_stationarity_statistics(
            data[ticker]['log_returns'].values)

        print(
            "{} Stationarity Statistics\n"
            "-------------\n"
            "Augmented Dickey-Fuller unit root test: {}\n"
            "MacKinnon’s approximate p-value: {}\n"
            "Critical values for the test statistic at the 1 %, 5 %, and 10 % levels: {}\n"
            "D’Agostino and Pearson’s normality test: {}\n"
            "Shapiro-Wilk normality test: {}\n"
            "Kolmogorov-Smirnov goodness of fit test: {}\n"
            "Anderson-Darling test: {}\n"
            "Kwiatkowski, Phillips, Schmidt, and Shin (KPSS) stationarity test: {}"
            .format(ticker, adfstat, pvalue, critvalues, dagostino_results,
                    shapiro_results, ks_results, anderson_results,
                    kpss_results))

        train, test = np.arange(0, 450), np.arange(
            451, len(data[ticker]['log_returns']))
        n = len(train)

        with pm.Model() as model:
            sigma = pm.Exponential('sigma', 1. / .02, testval=.1)
            mu = pm.Normal('mu', 0, sd=5, testval=.1)

            nu = pm.Exponential('nu', 1. / 10)
            logs = pm.GaussianRandomWalk('logs', tau=sigma**-2, shape=n)

            # lam uses variance in pymc3, not sd like in scipy
            r = pm.StudentT('r',
                            nu,
                            mu=mu,
                            lam=1 / np.exp(-2 * logs),
                            observed=data[ticker]['log_returns'].values[train])

        with model:
            start = pm.find_MAP(vars=[logs], fmin=sp.optimize.fmin_l_bfgs_b)

        with model:
            step = pm.NUTS(vars=[logs, mu, nu, sigma],
                           scaling=start,
                           gamma=.25)
            start2 = pm.sample(100, step, start=start)[-1]

            # Start next run at the last sampled position.
            step = pm.NUTS(vars=[logs, mu, nu, sigma],
                           scaling=start2,
                           gamma=.55)
            trace = pm.sample(2000, step, start=start2)

        pred_data[ticker], vol = fc.generate_proj_returns(
            1000, trace, len(test))

        pred_results = pd.DataFrame(
            data=dict(original=data[ticker]['log_returns'][test],
                      prediction=pred_data[ticker][1, :]),
            index=data[ticker]['log_returns'][test].index)

        print('{} Original Sharpe Ratio:'.format(ticker),
              fc.get_sharpe_ratio(returns=pred_results['original']))
        print('{} Prediction Sharpe Ratio:'.format(ticker),
              fc.get_sharpe_ratio(returns=pred_results['prediction']))

        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.plot(data[ticker]['log_returns'].values, color='blue')
        ax.plot(1 + len(train) + np.arange(0, len(test)),
                pred_data[ticker][1, :],
                color='red')
        ax.set(title='{} NUTS In-Sample Returns Prediction'.format(ticker),
               xlabel='time',
               ylabel='%')
        ax.legend(['Original', 'Prediction'])
        fig.tight_layout()
        fig.savefig(
            'charts/{}-NUTS-In-Sample-Returns-Prediction.png'.format(ticker))

        # out-of-sample test
        forecast_data[ticker], vol = fc.generate_proj_returns(
            1000, trace,
            len(test) + n_steps)

        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.plot(forecast_data[ticker][1, :][-n_steps:])
        ax.set(title='{} Day {} NUTS Out-of-Sample Returns Forecast'.format(
            n_steps, ticker),
               xlabel='time',
               ylabel='%')
        ax.legend(['Forecast'])
        fig.tight_layout()
        fig.savefig(
            'charts/{}-Day-{}-NUTS-Out-of-Sample-Returns-Forecast.png'.format(
                n_steps, ticker))

    fig = plt.figure()
    ax = fig.add_subplot(111)
    for ticker in tickers:
        ax.plot(data[ticker]['adj_close'])
    ax.set(title='Time series plot', xlabel='time', ylabel='$')
    ax.legend(tickers)
    fig.tight_layout()
    fig.savefig('charts/stocks-close-price.png')

    fig = plt.figure()
    ax = fig.add_subplot(111)
    for ticker in tickers:
        ax.plot(data[ticker]['log_returns'])
    ax.set(title='Time series plot', xlabel='time', ylabel='%')
    ax.legend(tickers)
    fig.tight_layout()
    fig.savefig('charts/stocks-close-returns.png')

    return forecast_data