def main(tickers=['AAPL'], start=None, end=None, n_steps=21): data = OrderedDict() pred_data = OrderedDict() forecast_data = OrderedDict() for ticker in tickers: data[ticker] = fc.get_time_series(ticker, start, end) # add the outcome variable, 1 if the trading session was positive (close>open), 0 otherwise data[ticker]['outcome'] = data[ticker].apply( lambda x: 1 if x['adj_close'] > x['adj_open'] else 0, axis=1) data[ticker] = fc.get_sma_classifier_features(data[ticker]).dropna() # cross-validation testing split = rand.uniform(0.60, 0.80) train_size = int(len(data[ticker]) * split) train, test = data[ticker][0:train_size], data[ticker][ train_size:len(data[ticker])] features = ['sma_2', 'sma_3', 'sma_4', 'sma_5', 'sma_6'] # values of features X = list(train[features].values) # target values Y = list(train['outcome']) # fit a Naive Bayes model to the data mdl = MLPClassifier(hidden_layer_sizes=(100, 100, 100)).fit(X, Y) print(mdl) # make predictions pred = mdl.predict(test[features].values) # summarize the fit of the model classification_report, confusion_matrix = fc.get_classifier_metrics( test['outcome'].values, pred) print("{} Neural Network\n" "-------------\n" "Classification report: {}\n\n" "Confusion matrix: {}\n\n".format(ticker, classification_report, confusion_matrix)) pred_results = pd.DataFrame(data=dict(original=test['outcome'], prediction=pred), index=test.index) pred_data[ticker] = pred_results # out-of-sample test forecast_data[ticker] = fc.forecast_classifier(model=mdl, sample=test, features=features, steps=n_steps) return forecast_data
def main(tickers=['AAPL'], start=None, end=None): data = OrderedDict() for ticker in tickers: data[ticker] = fc.get_time_series(ticker, start, end) # log_returns data[ticker]['log_returns'] = np.log( data[ticker]['adj_close'] / data[ticker]['adj_close'].shift(1)) data[ticker]['log_returns'].dropna(inplace=True) # plotting the histogram of returns fc.plot_histogram(y=data[ticker]['log_returns'], ticker=ticker) fc.plot_time_series(y=data[ticker]['log_returns'], lags=30, ticker=ticker)
def main(tickers='AAPL', start=None, end=None, n_steps=21): data = OrderedDict() pred_data = OrderedDict() forecast_data = OrderedDict() for ticker in tickers: data[ticker] = fc.get_time_series(ticker, start, end) # log_returns data[ticker]['log_returns'] = np.log( data[ticker]['adj_close'] / data[ticker]['adj_close'].shift(1)) data[ticker]['log_returns'].dropna(inplace=True) # plotting the histogram of returns fc.plot_histogram(data[ticker]['log_returns']) fc.plot_time_series(data[ticker]['log_returns'], lags=30) print("{} Series\n" "-------------\n" "mean: {:.3f}\n" "median: {:.3f}\n" "maximum: {:.3f}\n" "minimum: {:.3f}\n" "variance: {:.3f}\n" "standard deviation: {:.3f}\n" "skewness: {:.3f}\n" "kurtosis: {:.3f}".format(ticker, data[ticker]['adj_close'].mean(), data[ticker]['adj_close'].median(), data[ticker]['adj_close'].max(), data[ticker]['adj_close'].min(), data[ticker]['adj_close'].var(), data[ticker]['adj_close'].std(), data[ticker]['adj_close'].skew(), data[ticker]['adj_close'].kurtosis())) adfstat, pvalue, critvalues, resstore, dagostino_results, shapiro_results, ks_results, anderson_results, kpss_results = fc.get_stationarity_statistics( data[ticker]['log_returns'].values) print( "{} Stationarity Statistics\n" "-------------\n" "Augmented Dickey-Fuller unit root test: {}\n" "MacKinnon’s approximate p-value: {}\n" "Critical values for the test statistic at the 1 %, 5 %, and 10 % levels: {}\n" "D’Agostino and Pearson’s normality test: {}\n" "Shapiro-Wilk normality test: {}\n" "Kolmogorov-Smirnov goodness of fit test: {}\n" "Anderson-Darling test: {}\n" "Kwiatkowski, Phillips, Schmidt, and Shin (KPSS) stationarity test: {}" .format(ticker, adfstat, pvalue, critvalues, dagostino_results, shapiro_results, ks_results, anderson_results, kpss_results)) # Fit ARMA model to AAPL returns res_tup = fc.get_best_arma_model(data[ticker]['log_returns']) res_tup[2].summary() # verify stationarity adfstat, pvalue, critvalues, resstore, dagostino_results, shapiro_results, ks_results, anderson_results, kpss_results = fc.get_stationarity_statistics( res_tup[2].resid.values) print( "Stationarity Statistics\n" "-------------\n" "Augmented Dickey-Fuller unit root test: {}\n" "MacKinnon’s approximate p-value: {}\n" "Critical values for the test statistic at the 1 %, 5 %, and 10 % levels: {}\n" "D’Agostino and Pearson’s normality test: {}\n" "Shapiro-Wilk normality test: {}\n" "Kolmogorov-Smirnov goodness of fit test: {}\n" "Anderson-Darling test: {}\n" "Kwiatkowski, Phillips, Schmidt, and Shin (KPSS) stationarity test: {}" .format(adfstat, pvalue, critvalues, dagostino_results, shapiro_results, ks_results, anderson_results, kpss_results)) fc.plot_histogram(y=res_tup[2].resid, ticker=ticker, title='ARMA') fc.plot_time_series(y=res_tup[2].resid, lags=30, ticker=ticker, title='ARMA') # cross-validation testing split = rand.uniform(0.60, 0.80) train_size = int(len(data[ticker]) * split) train, test = data[ticker][0:train_size], data[ticker][ train_size:len(data[ticker])] # in-sample prediction pred_data[ticker] = res_tup[2].predict(start=len(train), end=len(train) + len(test) - 1) pred_results = pd.DataFrame(data=dict( original=test['log_returns'], prediction=pred_data[ticker].values), index=test.index) print('{} Original Sharpe Ratio:'.format(ticker), fc.get_sharpe_ratio(returns=pred_results['original'])) print('{} Prediction Sharpe Ratio:'.format(ticker), fc.get_sharpe_ratio(returns=pred_results['prediction'])) # prediction plot fig = plt.figure() ax = fig.add_subplot(111) ax.plot(pred_results['original']) ax.plot(pred_results['prediction']) ax.set(title='{} ARMA{} In-Sample Return Prediction'.format( ticker, res_tup[1]), xlabel='time', ylabel='$') ax.legend(['Original', 'Prediction']) fig.tight_layout() fig.savefig( 'charts/{}-ARMA-In-Sample-Return-Prediction'.format(ticker)) # out-of-sample forecast forecast_data[ticker] = res_tup[2].forecast(steps=n_steps) # forecast plot fig = plt.figure() ax = fig.add_subplot(111) ax.plot(forecast_data[ticker][0]) ax.set( title='{} Day {} ARMA{} Out-of-Sample Return Forecast.png'.format( n_steps, ticker, res_tup[1]), xlabel='time', ylabel='$') ax.legend(['Forecast']) fig.tight_layout() fig.savefig( 'charts/{}-Day-{}-ARMA-Out-of-Sample-Return-Forecast.png'.format( n_steps, ticker)) # end of day plot of all tickers fig = plt.figure() ax = fig.add_subplot(111) for ticker in tickers: ax.plot(data[ticker]['adj_close']) ax.set(title='Time series plot', xlabel='time', ylabel='$') ax.legend(tickers) fig.tight_layout() fig.savefig('charts/stocks.png') return forecast_data
def main(tickers=['AAPL'], start=None, end=None, n_steps=21): """ Main entry point of the app """ data = OrderedDict() pred_data = OrderedDict() forecast_data = OrderedDict() for ticker in tickers: data[ticker] = fc.get_time_series(ticker, start, end) data[ticker] = fc.get_sma_regression_features(data[ticker]).dropna() # cross-validation testing split = rand.uniform(0.60, 0.80) train_size = int(len(data[ticker]) * split) train, test = data[ticker][0:train_size], data[ticker][train_size:len(data[ticker])] features = ['sma_15', 'sma_50'] # values of features X = np.array(train[features].values) # target values Y = list(train['adj_close']) # fit a Naive Bayes model to the data mdl = MLPRegressor(hidden_layer_sizes=(100, 100, 100)).fit(X, Y) # print(mdl) # in-sample test pred = mdl.predict(test[features].values) # summarize the fit of the model explained_variance_score, mean_absolute_error, mean_squared_error, median_absolute_error, r2_score = fc.get_regression_metrics( test['adj_close'].values, pred) print("{} Neural Network\n" "-------------\n" "Explained variance score: {:.3f}\n" "Mean absolute error: {:.3f}\n" "Mean squared error: {:.3f}\n" "Median absolute error: {:.3f}\n" "Coefficient of determination: {:.3f}".format(ticker, explained_variance_score, mean_absolute_error, mean_squared_error, median_absolute_error, r2_score)) pred_results = pd.DataFrame(data=dict(original=test['adj_close'], prediction=pred), index=test.index) pred_data[ticker] = pred_results # out-of-sample test forecast_data[ticker] = fc.forecast_regression(model=mdl, sample=test, features=features, steps=n_steps) fig = plt.figure() ax = fig.add_subplot(111) ax.plot(pred_data[ticker]['original']) ax.plot(pred_data[ticker]['prediction']) ax.set(title='{} Neural Network In-Sample Prediction'.format(ticker), xlabel='time', ylabel='$') ax.legend(['Original $', 'Prediction $']) fig.tight_layout() fig.savefig('charts/{}-Neural-Network-In-Sample-Prediction.png'.format(ticker)) fig = plt.figure() ax = fig.add_subplot(111) ax.plot(forecast_data[ticker]['adj_close'][-n_steps:]) ax.set(title='{} Day {} Neural Network Out-of-Sample Forecast'.format(n_steps, ticker), xlabel='time', ylabel='$') ax.legend(['Forecast $']) fig.tight_layout() fig.savefig('charts/{}-Day-{}-Neural-Network-Out-of-Sample-Forecast.png'.format(n_steps, ticker)) fig = plt.figure() ax = fig.add_subplot(111) for ticker in tickers: ax.plot(data[ticker]['adj_close']) ax.set(title='Time series plot', xlabel='time', ylabel='$') ax.legend(tickers) fig.tight_layout() fig.savefig('charts/stocks.png') return forecast_data
def main(tickers=['AAPL'], start=None, end=None, n_steps=21): data = OrderedDict() pred_data = OrderedDict() forecast_data = OrderedDict() for ticker in tickers: data[ticker] = fc.get_time_series(ticker, start, end) data[ticker] = fc.get_sma_regression_features(data[ticker]).dropna() # cross-validation testing split = rand.uniform(0.60, 0.80) train_size = int(len(data[ticker]) * split) train, test = data[ticker][0:train_size], data[ticker][ train_size:len(data[ticker])] # values of features X = list(train['sma_15'].values) # target values Y = list(train['adj_close'].values) mdl = sm.OLS(Y, X).fit() print(mdl.summary()) print(mdl.params) print(mdl.bse) # in sample prediction pred = mdl.predict(test['sma_15'].values) # summarize the fit of the model explained_variance_score, mean_absolute_error, mean_squared_error, median_absolute_error, r2_score = fc.get_regression_metrics( test['adj_close'].values, pred) print("{} Ordinary Least Squares\n" "-------------\n" "Explained variance score: {:.3f}\n" "Mean absolute error: {:.3f}\n" "Mean squared error: {:.3f}\n" "Median absolute error: {:.3f}\n" "Coefficient of determination: {:.3f}".format( ticker, explained_variance_score, mean_absolute_error, mean_squared_error, median_absolute_error, r2_score)) pred_results = pd.DataFrame(data=dict(original=test['adj_close'], prediction=pred), index=test.index) pred_data[ticker] = pred_results # out-of-sample test forecast_data[ticker] = fc.forecast_regression(model=mdl, sample=test, features='sma_15', steps=n_steps) fig = plt.figure() ax = fig.add_subplot(111) ax.plot(pred_data[ticker]['original'], color='red') ax.plot(pred_data[ticker]['prediction'], color='blue') ax.set(title='{} OLS In-Sample Prediction'.format(ticker), xlabel='time', ylabel='$') ax.legend(['Original $', 'Prediction $']) fig.tight_layout() fig.savefig('charts/{}-OLS-In-Sample-Prediction'.format(ticker)) fig = plt.figure() ax = fig.add_subplot(111) ax.plot(forecast_data[ticker]['adj_close'][-n_steps:]) ax.set(title='{} Day {} OLS Out-of-Sample Forecast'.format( n_steps, ticker), xlabel='time', ylabel='$') ax.legend(tickers) fig.tight_layout() fig.savefig('charts/{}-Day-{}-OLS-Out-of-Sample-Forecast'.format( n_steps, ticker)) fig = plt.figure() ax = fig.add_subplot(111) for ticker in tickers: ax.plot(data[ticker]['adj_close']) ax.set(title='Time series plot', xlabel='time', ylabel='$') ax.legend(tickers) fig.tight_layout() fig.savefig('charts/stocks.png') return forecast_data
def main(tickers=['AAPL'], start=None, end=None, n_steps=21): data = OrderedDict() pred_data = OrderedDict() forecast_data = OrderedDict() for ticker in tickers: data[ticker] = fc.get_time_series(ticker, start, end) # add the outcome variable, 1 if the trading session was positive (close>open), 0 otherwise data[ticker]['outcome'] = data[ticker].apply( lambda x: 1 if x['adj_close'] > x['adj_open'] else 0, axis=1) data[ticker] = fc.get_sma_classifier_features(data[ticker]) train_size = int(len(data[ticker]) * 0.80) train, test = data[ticker][0:train_size], data[ticker][ train_size:len(data[ticker])] features = ['sma_2', 'sma_3', 'sma_4', 'sma_5', 'sma_6'] # values of features X = list(train[features].values) # target values Y = list(train['outcome']) mdl = DecisionTreeClassifier().fit(X, Y) print(mdl) ''' dot_data = export_graphviz(mdl, out_file=None, feature_names=list(train[['feat1', 'feat2', 'feat3', 'feat4', 'feat5']]), class_names='outcome', filled=True, rounded=True, special_characters=True) graph = pydot.graph_from_dot_data(dot_data) graph.write_png("charts/decision-tree-classifier2.png") ''' pred = mdl.predict(test[features].values) pred_prob = mdl.predict_proba(test[features].values) # summarize the fit of the model classification_report, confusion_matrix = fc.get_classifier_metrics( test['outcome'].values, pred) print("{} Decision Tree\n" "-------------\n" "Classification report: {}\n" "Confusion matrix: {}\n" "Prediction probability: {}\n".format(ticker, classification_report, confusion_matrix, pred_prob)) pred_results = pd.DataFrame(data=dict(original=test['outcome'], prediction=pred), index=test.index) pred_data[ticker] = pred_results # out-of-sample test forecast_data[ticker] = fc.forecast_classifier(model=mdl, sample=test, features=features, steps=n_steps) return forecast_data
def main(tickers=['AAPL'], n_steps=21): """ Main entry point of the app """ data = OrderedDict() pred_data = OrderedDict() forecast_data = OrderedDict() for ticker in tickers: data[ticker] = fc.get_time_series(ticker)[-500:] print("{} Series\n" "-------------\n" "mean: {:.3f}\n" "median: {:.3f}\n" "maximum: {:.3f}\n" "minimum: {:.3f}\n" "variance: {:.3f}\n" "standard deviation: {:.3f}\n" "skewness: {:.3f}\n" "kurtosis: {:.3f}".format(ticker, data[ticker]['adj_close'].mean(), data[ticker]['adj_close'].median(), data[ticker]['adj_close'].max(), data[ticker]['adj_close'].min(), data[ticker]['adj_close'].var(), data[ticker]['adj_close'].std(), data[ticker]['adj_close'].skew(), data[ticker]['adj_close'].kurtosis())) data[ticker]['log_returns'] = np.log( data[ticker]['adj_close'] / data[ticker]['adj_close'].shift(1)) data[ticker]['log_returns'].dropna(inplace=True) adfstat, pvalue, critvalues, resstore, dagostino_results, shapiro_results, ks_results, anderson_results, kpss_results = fc.get_stationarity_statistics( data[ticker]['log_returns'].values) print( "{} Stationarity Statistics\n" "-------------\n" "Augmented Dickey-Fuller unit root test: {}\n" "MacKinnon’s approximate p-value: {}\n" "Critical values for the test statistic at the 1 %, 5 %, and 10 % levels: {}\n" "D’Agostino and Pearson’s normality test: {}\n" "Shapiro-Wilk normality test: {}\n" "Kolmogorov-Smirnov goodness of fit test: {}\n" "Anderson-Darling test: {}\n" "Kwiatkowski, Phillips, Schmidt, and Shin (KPSS) stationarity test: {}" .format(ticker, adfstat, pvalue, critvalues, dagostino_results, shapiro_results, ks_results, anderson_results, kpss_results)) train, test = np.arange(0, 450), np.arange( 451, len(data[ticker]['log_returns'])) n = len(train) with pm.Model() as model: sigma = pm.Exponential('sigma', 1. / .02, testval=.1) mu = pm.Normal('mu', 0, sd=5, testval=.1) nu = pm.Exponential('nu', 1. / 10) logs = pm.GaussianRandomWalk('logs', tau=sigma**-2, shape=n) # lam uses variance in pymc3, not sd like in scipy r = pm.StudentT('r', nu, mu=mu, lam=1 / np.exp(-2 * logs), observed=data[ticker]['log_returns'].values[train]) with model: start = pm.find_MAP(vars=[logs], fmin=sp.optimize.fmin_l_bfgs_b) with model: step = pm.NUTS(vars=[logs, mu, nu, sigma], scaling=start, gamma=.25) start2 = pm.sample(100, step, start=start)[-1] # Start next run at the last sampled position. step = pm.NUTS(vars=[logs, mu, nu, sigma], scaling=start2, gamma=.55) trace = pm.sample(2000, step, start=start2) pred_data[ticker], vol = fc.generate_proj_returns( 1000, trace, len(test)) pred_results = pd.DataFrame( data=dict(original=data[ticker]['log_returns'][test], prediction=pred_data[ticker][1, :]), index=data[ticker]['log_returns'][test].index) print('{} Original Sharpe Ratio:'.format(ticker), fc.get_sharpe_ratio(returns=pred_results['original'])) print('{} Prediction Sharpe Ratio:'.format(ticker), fc.get_sharpe_ratio(returns=pred_results['prediction'])) fig = plt.figure() ax = fig.add_subplot(111) ax.plot(data[ticker]['log_returns'].values, color='blue') ax.plot(1 + len(train) + np.arange(0, len(test)), pred_data[ticker][1, :], color='red') ax.set(title='{} NUTS In-Sample Returns Prediction'.format(ticker), xlabel='time', ylabel='%') ax.legend(['Original', 'Prediction']) fig.tight_layout() fig.savefig( 'charts/{}-NUTS-In-Sample-Returns-Prediction.png'.format(ticker)) # out-of-sample test forecast_data[ticker], vol = fc.generate_proj_returns( 1000, trace, len(test) + n_steps) fig = plt.figure() ax = fig.add_subplot(111) ax.plot(forecast_data[ticker][1, :][-n_steps:]) ax.set(title='{} Day {} NUTS Out-of-Sample Returns Forecast'.format( n_steps, ticker), xlabel='time', ylabel='%') ax.legend(['Forecast']) fig.tight_layout() fig.savefig( 'charts/{}-Day-{}-NUTS-Out-of-Sample-Returns-Forecast.png'.format( n_steps, ticker)) fig = plt.figure() ax = fig.add_subplot(111) for ticker in tickers: ax.plot(data[ticker]['adj_close']) ax.set(title='Time series plot', xlabel='time', ylabel='$') ax.legend(tickers) fig.tight_layout() fig.savefig('charts/stocks-close-price.png') fig = plt.figure() ax = fig.add_subplot(111) for ticker in tickers: ax.plot(data[ticker]['log_returns']) ax.set(title='Time series plot', xlabel='time', ylabel='%') ax.legend(tickers) fig.tight_layout() fig.savefig('charts/stocks-close-returns.png') return forecast_data
from tensorflow.keras.layers import LSTM from tensorflow.keras.layers import Dense from tensorflow.keras.layers import Bidirectional from tensorflow.keras.layers import Flatten from tensorflow.keras.layers import TimeDistributed from tensorflow.keras.layers.convolutional import Conv1D from tensorflow.keras.layers.convolutional import MaxPooling1D from tensorflow.keras.layers import ConvLSTM2D from simple_models import train_test_split_serie, split_to_sequence, train_lstm_model, predict_lstm from functions import get_time_series, get_residuals serie = get_time_series(stock='aapl', start='2017-01-01', end='2021-01-01', value='Open', index_as_date=False) train, test = train_test_split_serie(serie, train_size=0.8) X_train, y_train = split_to_sequence(train, n_steps=4) X_test, y_test = split_to_sequence(test, n_steps=4) lstm_types = ['vanilla', 'stacked', 'bidirectional'] n_steps = 4 preds, train_preds, resids, mse_report_test, mse_report_train = { 'aapl': y_test }, {}, {}, {}, {} for lstm_type in lstm_types:
case_doubling='Doubling Time for Confirmed Cases', death_doubling='Doubling Time of Deaths') variable_dict = {} for variable, label in label_dict.items(): variable_dict[label] = variable dates = np.array([ datetime.datetime.strptime(date, '%m/%d/%y') for date in data_df.date.unique() ]) date_strings = [date.strftime('%-m/%-d/%y') for date in dates] country_labels = make_country_labels(data=confirmed) old_confirmed, old_deaths, old_recovered, time_series_dates = get_time_series( local=config['LOCAL']) print("TIME SERIES DATE LIST ORIGINAL") print(time_series_date_list) def confinement_by_area(country='Sweden', col='country', df=None): data = pd.Series( [df.loc[df[col] == country][date].mean() for date in confined_dates]) print("DATA BY AREA") print(data) return data # Creating our dataframe with mean,max,std values of our detected number of people def make_data_confinement(city='Stockholm'):
def main(tickers=['AAPL'], start=None, end=None, n_steps=21): data = OrderedDict() pred_data = OrderedDict() forecast_data = OrderedDict() for ticker in tickers: data[ticker] = fc.get_time_series(ticker, start, end) data[ticker] = fc.get_sma_regression_features(data[ticker]).dropna() # cross-validation testing split = rand.uniform(0.60, 0.80) train_size = int(len(data[ticker]) * split) train, test = data[ticker][0:train_size], data[ticker][ train_size:len(data[ticker])] features = ['sma_15', 'sma_50'] # values of features X = np.array(train[features].values) # target values Y = np.array(train['adj_close']) mdl = DecisionTreeRegressor().fit(X, Y) print(mdl) ''' dot_data = export_graphviz(mdl, out_file=None, feature_names=list(train[features]), class_names='outcome', filled=True, rounded=True, special_characters=True) graph = pydot.graph_from_dot_data(dot_data) graph.write_png("charts/decision-tree-regression.png") ''' pred = mdl.predict(test[features].values) # summarize the fit of the model explained_variance_score, mean_absolute_error, mean_squared_error, median_absolute_error, r2_score = fc.get_regression_metrics( test['adj_close'].values, pred) print("{} Decision Trees\n" "-------------\n" "Explained variance score: {:.3f}\n" "Mean absolute error: {:.3f}\n" "Mean squared error: {:.3f}\n" "Median absolute error: {:.3f}\n" "Coefficient of determination: {:.3f}".format( ticker, explained_variance_score, mean_absolute_error, mean_squared_error, median_absolute_error, r2_score)) pred_results = pd.DataFrame(data=dict(original=test['adj_close'], prediction=pred), index=test.index) pred_data[ticker] = pred_results # out-of-sample test forecast_data[ticker] = fc.forecast_regression(model=mdl, sample=test.copy(), features=features, steps=n_steps) fig = plt.figure() ax = fig.add_subplot(111) ax.plot(pred_data[ticker]['original']) ax.plot(pred_data[ticker]['prediction']) ax.set(title='{} Decision Trees In-Sample Prediction'.format(ticker), xlabel='time', ylabel='$') ax.legend(['Original $', 'Prediction $']) fig.tight_layout() fig.savefig( 'charts/{}-Decision-Trees-In-Sample-Prediction.png'.format(ticker)) fig = plt.figure() ax = fig.add_subplot(111) ax.plot(forecast_data[ticker]['adj_close'][-n_steps:]) ax.set(title='{} Day {} Decision Trees Out-of-Sample Forecast'.format( n_steps, ticker), xlabel='time', ylabel='$') ax.legend(['Forecast $']) fig.tight_layout() fig.savefig( 'charts/{}-Day-{}-Decision-Trees-Out-of-Sample-Forecast'.format( n_steps, ticker)) fig = plt.figure() ax = fig.add_subplot(111) for ticker in tickers: ax.plot(data[ticker]['adj_close']) ax.set(title='Time series plot', xlabel='time', ylabel='$') ax.legend(tickers) fig.tight_layout() fig.savefig('charts/stocks.png') return forecast_data
start = '2010-1-1' end = '2017-1-1' tickers = ['MSFT', 'CDE', 'NAVB', 'HRG', 'HL'] # index to benchmark the algorithm benchmark = 'GSPC' # initialising an ordered dictionary to store all our stocks data = OrderedDict() # tidying the data for the backtester for ticker in tickers: data[ticker] = fc.get_time_series(ticker=ticker, start_date=start, end_date=end) data[ticker].drop( ['open', 'high', 'low', 'close', 'ex-dividend', 'split_ratio'], axis=1, inplace=True) data[ticker].rename(columns={ 'ticker': 'sid', 'adj_open': 'open', 'adj_high': 'high', 'adj_low': 'low', 'adj_close': 'close' }, inplace=True)
def main(tickers=['AAPL'], start=None, end=None, n_steps=21): data = OrderedDict() pred_data = OrderedDict() forecast_data = OrderedDict() for ticker in tickers: data[ticker] = fc.get_time_series(ticker, start, end) # add the outcome variable, 1 if the trading session was positive (close>open), 0 otherwise data[ticker]['outcome'] = data[ticker].apply( lambda x: 1 if x['adj_close'] > x['adj_open'] else -1, axis=1) data[ticker] = fc.get_sma_classifier_features(data[ticker]) # cross-validation testing split = rand.uniform(0.60, 0.80) train_size = int(len(data[ticker]) * split) train, test = data[ticker][0:train_size], data[ticker][ train_size:len(data[ticker])] features = ['sma_2', 'sma_3', 'sma_4', 'sma_5', 'sma_6'] # values of features X = list(train[features].values) # target values Y = list(train['outcome']) clf1 = AdaBoostClassifier() clf2 = RandomForestClassifier() clf3 = DecisionTreeClassifier() clf4 = KNeighborsClassifier() clf5 = LogisticRegression() clf6 = SGDClassifier() clf7 = MLPClassifier() clf8 = GaussianNB() clf9 = BernoulliNB() clf10 = SVC() mdl = VotingClassifier(estimators=[('bt', clf1), ('rf', clf2), ('dt', clf3), ('knn', clf4), ('lgt', clf5), ('sgd', clf6), ('mlp', clf7), ('gnb', clf8), ('bnb', clf9), ('svm', clf10)], voting='hard').fit(X, Y) print(mdl) confidence = mdl.score(test[features].values, test['outcome'].values) print("{} Voting Classifier\n" "-------------\n" "Confidence: {}\n".format(ticker, confidence)) pred = mdl.predict(test[features].values) pred_data[ticker] = pred # out-of-sample test forecast_data[ticker] = fc.forecast_classifier( model=mdl, sample=test, features=features, steps=n_steps)['outcome'] return forecast_data
from copulas.visualization import hist_1d, side_by_side from copulas.visualization import compare_2d from copulas.bivariate import gumbel from sklearn.preprocessing import MinMaxScaler import numpy as np import seaborn as sns from scipy import stats from functions import get_time_series , arima_model from copulas.multivariate import GaussianMultivariate aapl = get_time_series(stock="aapl" , start="2018-01-01" , end="2020-02-01" , value='Open' , index_as_date=True) arima , res = arima_model( aapl , (3,1,0) , model_report = False ) arima , res2 = arima_model( aapl , (2,1,0) , model_report = False ) res2 = aapl - aapl.mean() df0 = pd.DataFrame({'res1':res,'res2':res2}) df = pd.DataFrame(MinMaxScaler().fit_transform(df0.values),columns = df0.columns) df = pd.DataFrame(np.where(df==0 , 0.00000001 , np.where(df==1 , 0.99999999 , df)),columns = df0.columns)