def get_data(day_range): full_apple_train = get_stock_data("AAPL", "2003-02-10", "2004-09-12") last_59_train = full_apple_train[-(day_range - 1):] part_apple_test = get_stock_data("AAPL", "2004-09-13", "2005-01-22") full_apple_test = last_59_train.append(part_apple_test) full_ibm_train = get_stock_data("IBM", "2003-02-10", "2004-09-12") last_59_train = full_ibm_train[-(day_range - 1):] part_ibm_test = get_stock_data("IBM", "2004-09-13", "2005-01-22") full_ibm_test = last_59_train.append(part_ibm_test) return full_ibm_train, full_ibm_test, full_apple_train, full_apple_test
def test_get_stock_data(): stock_df = get_stock_data(PHISIX_SYMBOL, DATE_START, DATE_END, source="phisix") assert isinstance(stock_df, pd.DataFrame) stock_df = get_stock_data(YAHOO_SYMBOL, DATE_START, DATE_END, source="yahoo") assert isinstance(stock_df, pd.DataFrame)
def test_get_stock_data(): # Test w/ respective sources stock_df = get_stock_data( PHISIX_SYMBOL, DATE_START, DATE_END, source="phisix" ) assert isinstance(stock_df, pd.DataFrame) stock_df = get_stock_data( YAHOO_SYMBOL, DATE_START, DATE_END, source="yahoo" ) assert isinstance(stock_df, pd.DataFrame) # Test getting yahoo when (default) phisix fails on a non PSE SYMBOL stock_df = get_stock_data(YAHOO_SYMBOL, DATE_START, DATE_END) assert isinstance(stock_df, pd.DataFrame)
def get_ticker_data(self): if (self.period): today = date.today() yesterday = today - timedelta(days=1) try: arr_data = get_stock_data("TSLA", "2015-01-01", yesterday) print(arr_data) except Exception as e: print('get stock data error, query misformed line 20') print(e) dates = [] print(arr_data.iloc) for i in reversed(range(len(arr_data.index))): print(1) dates.append(arr_data.iloc[i].name) if len(dates) == self.period: break result = [] for b in reversed(dates): result.append(b) all_dates = [] for c in range(len(arr_data.index)): all_dates.append(arr_data.iloc[c].name) return result, arr_data, all_dates
def predict(self): # use actual ticker this sent-model is supposed to predict # TODO: this is why we need more data... cant be just using the same data as from # from training, when enough is present, we'll use different start and end dates # for both training and testing... issue is also that the observations are from training # we will need to pull observations from some testing date set. In reality this model # is like the others used for just next day prediction... again in the future with more # data we could possibly use multiple previous days of sentiment to predict the next day # we have to add 1 day since fastquant is not inclusive of the end date test_data = get_stock_data(self.ticker, self.start_date, self.end_date + timedelta(days=1)) # again we ignore the first day, since we dont have the day before its sentiment test_data = test_data[1:] # true opening and closing prices... same type of prediction model as ghmm and rnn's # however here we cant use our predicted fractional change as part of the new observations # we actually have to have the average sentiment for a previous day... tricky model here... actual = test_data['close'].values opens = test_data['open'].values # observations again should come from a test set when we have enough data obs = self.train_data[self.train_data['ticker'] == self.ticker]['avgSent'].values # loop through test data and predict closing prices using opening and the model preds = [] for i in range(len(test_data)): pred_frac_change = self.model.predict(obs[i].reshape(-1, 1)) pred_close = pred_frac_change[0] * opens[i] + opens[i] preds.append(pred_close) return np.array(preds).flatten(), actual
def gen_data(self, ticker): # grab only tweets corresponding to the given ticker ticker_data = self.tweets[self.tweets['ticker'] == ticker] # collect the stock price data for the given ticker from date range ticker_price_data = get_stock_data(ticker, self.start_date, self.end_date + timedelta(days=1)) # convert data to fractional change ticker_frac_change = self.data_prep(ticker_price_data) # since we are using prev sentiment to predict next day, first frac change is useless ticker_frac_change = ticker_frac_change[1:] # calculate the average sentiment over unique dates avg_sent = [ self.calc_avg_sentiment(ticker, date) for date in self.dates ] # convert average sent to predict uptrend or downtrend avg_sent = [ 'UPTREND' if sent > 0 else 'DOWNTREND' for sent in avg_sent ] # create cols in the data frame for the average sentiment and the ticker ticker_frac_change['SentPred'] = avg_sent ticker_frac_change['ticker'] = ticker return ticker_frac_change
def rolling_window_test(self): # train on 1155 points, test on 10 points # slide window over by testing_size each time to get 10 tests training_size = 1155 testing_size = 10 for test in self.tests.values(): # var to store error and test num error = 0 test_n = 0 # collect the data for the window window_params = test['window'] ticker = window_params['ticker'] window = get_stock_data(ticker, window_params['start'], window_params['end']) # 10 tests within the window for i in range(0, 100, 10): train_data = window.iloc[i:i + training_size] test_data = window.iloc[i + training_size:i + training_size + testing_size] print(f'window {i+1}') # make the model self.model = self.Model(params=self.params) # train and predict self.model.train(train_data=train_data) preds, actuals = self.model.predict(test_data=test_data) # get error for this window error += self.model.mean_abs_percent_error(y_pred=preds, y_true=actuals) test_n += 1 print('DONE') # use last window for plotting if self.plot: self.model.plot_continuous( preds=preds, train=train_data, actual=actuals, title= f'{self.model.name} {ticker} forecasted vs actual continuous stock price' ) # store average MAPE error avg_error = error / test_n self.results[f'{self.model.name}:{ticker}'] = avg_error # write errors to file dump = json.dumps(self.results) output_file = open(self.f, 'w') output_file.write(dump) output_file.close()
def test_backtest(): """ Ensures that the backtest function works on all the registered strategies, with their default parameter values """ sample = pd.read_csv(SAMPLE_CSV, parse_dates=["dt"]) # Simulate custom indicator sample["custom"] = np.random.random((sample.shape[0], )) * 100 for strategy in STRATEGY_MAPPING.keys(): if strategy == "sentiment": data = get_yahoo_data("TSLA", "2020-01-01", "2020-07-04", dividends=True) # use cached data instead of scraping for tests purposes. # sentiments = get_bt_news_sentiment(keyword="tesla", page_nums=2) with open(SENTI_PKL, "rb") as handle: sentiments = pickle.load(handle) cerebro = backtest(strategy, data, sentiments=sentiments, senti=0.4, plot=False) errmsg = "Backtest encountered error for strategy '{}'!".format( strategy) assert cerebro is not None, errmsg data_disclosures = get_stock_data( "TSLA", "2020-01-01", "2020-09-30", dividends=True, # source="phisix" ) # sentiments_disclosures = get_disclosure_sentiment( # stock_code="JFC", # start_date="2020-07-01", # end_date="2020-09-30", # ) with open(DISCLOSURE_PKL, "rb") as handle_disclosures: sentiments_disclosures = pickle.load(handle_disclosures) cerebro_disclosures = backtest( strategy, data_disclosures, sentiments=sentiments_disclosures, senti=0.2, plot=False, ) errmsg_disclosures = "Backtest encountered error for strategy '{}'!".format( strategy) assert cerebro_disclosures is not None, errmsg_disclosures else: cerebro = backtest(strategy, sample, plot=False) errmsg = "Backtest encountered error for strategy '{}'!".format( strategy) assert cerebro is not None, errmsg
def get_data(self): dfs = [] for i in self.stock_list: df = get_stock_data(i, self.start_date, self.end_date) df.columns = [i] dfs.append(df) data = pd.concat(dfs, axis=1) data.index.name = 'DATE' return data
def get_ticker_data(self): if(self.name): today = date.today() yesterday = today - timedelta(days = 1) try: pd_data = get_stock_data(self.name, "2017-01-01", yesterday) np_data = pd_data.values except Exception as e: print('get stock data error, query misformed line 20') print(e) return np_data, pd_data
def __init__(self, ticker, period, fast_period, slow_period): today = datetime.datetime.now() delta = today - datetime.timedelta(days=period) self.period = period self.slow_period = slow_period self.fast_period = fast_period self.end_date = today self.start_date = delta self.tkr = ticker self.df = get_stock_data(self.tkr, self.start_date , self.end_date) self.MACD() self.RSI() self.df['SMA_slow'] = self.SMA(period=self.slow_period) self.df['SMA_fast'] = self.SMA(period=self.fast_period)
def get_stock_data(self, format="ohlc"): """overwrites get_stock_data Note that stock data requires YYYY-MM-DD """ start_date = format_date(self.start_date, informat=CALENDAR_FORMAT, outformat="%Y-%m-%d") end_date = format_date(self.end_date, informat=CALENDAR_FORMAT, outformat="%Y-%m-%d") if self.verbose: print("Pulling {} stock data...".format(self.symbol)) data = get_stock_data( self.symbol, start_date=start_date, end_date=end_date, format=format, ) self.stock_data = data return data
def predict(self): test_data = get_stock_data(self.ticker, self.start_date, self.end_date + timedelta(days=1)) # again we ignore the first day, since we dont have the day before its sentiment test_data = test_data[1:] actual = test_data['close'].values actual = ['UPTREND' if a > 0 else 'DOWNTREND' for a in actual] # observations again should come from a test set when we have enough data obs = self.train_data[self.train_data['ticker'] == self.ticker]['SentPred'].values # loop through test data and and see how often we are correct correct = 0 for i in range(len(test_data)): if obs[i] == actual[i]: correct += 1 return correct / len(actual)
def get_stock_data(self, format="dohlc"): """overwrites get_stock_data Note that stock data requires YYYY-MM-DD """ start_date = format_date(self.start_date, informat=CALENDAR_FORMAT, outformat="%Y-%m-%d") end_date = format_date(self.end_date, informat=CALENDAR_FORMAT, outformat="%Y-%m-%d") if self.verbose: print("Pulling {} stock data...".format(self.symbol)) data = get_stock_data( self.symbol, start_date=start_date, end_date=end_date, format=format, ) data["dt"] = pd.to_datetime(data.dt) # set dt as index data = data.set_index("dt") self.stock_data = data return data
def query_data(): return get_stock_data('baba', "2020-01-28", "2021-01-28")
from fastquant import get_stock_data df = get_stock_data("PTT.BK", "2015-01-01", "2020-08-30")
from fastquant import backtest, get_stock_data df = get_stock_data("AAAA", "2018-01-01", "2021-04-01") res,plot = backtest('macd', df, init_cash=2000, fast_period=12, slow_period=26, signal_period=9, sma_period=30, dir_period=10, return_plot=True) #plot.savefig('AAAA_macd.png')
from fastquant import get_stock_data, backtest import matplotlib.pyplot as plt import pandas as pd import numpy as np from datetime import date, timedelta from tapy import Indicators arr = [] df = get_stock_data('tsla', "2017-01-01", "2020-01-01") print(df.columns) df.columns = ['Open', 'High', 'Low', 'Close', 'Volume'] indicators = Indicators(df) indicators.awesome_oscillator(column_name='AO') indicators.sma(column_name='sma') # Indicators.fractals(column_name_high='fractals_high', column_name_low='fractals_low') indicators.fractals(column_name_high='fractal_highs', column_name_low='fractal_lows') df = indicators.df print(df.tail()) plt.plot(df['sma']) plt.plot(df['AO']) plt.plot(df['fractal_highs']) plt.plot(df['fractal_lows']) plt.show()
from fastquant import get_stock_data, backtest from datetime import date today = date.today() df = get_stock_data("WFG.TO", "2015-09-01", today) backtest('smac', df, fast_period=25, slow_period=52) # res = backtest("smac", df, fast_period=range(10, 30, 3), slow_period=range(40, 55, 3), verbose=False) # print(res[['fast_period', 'slow_period', 'final_value']].head()) #
def get_data(self, ticker, start_date, end_date): return get_stock_data(ticker, start_date, end_date)
from fastquant import get_crypto_data, get_stock_data, backtest from fbprophet import Prophet from matplotlib import pyplot as plt from fbprophet.diagnostics import cross_validation from sklearn.metrics import mean_squared_error import numpy as np import pandas as pd #max # of records is 500 #outter_df = get_crypto_data("BTC/USDT", "2019-01-01", "2020-05-31") outter_df = get_stock_data("JFC", "2018-01-01", "2019-05-31") #oneyroneqtr=456 oneyroneqtr = 315 length = len(outter_df[0:oneyroneqtr]) custompre = pd.DataFrame() def MAPE(Y_actual, Y_Predicted): mape_ = np.mean(np.abs((Y_actual - Y_Predicted) / Y_actual)) * 100 return mape_ for x in range(0, len(outter_df)): frames = outter_df.index[x:(x + oneyroneqtr)] length = len(frames) if length == oneyroneqtr: #print(outter_df.loc[frames]) df = outter_df.loc[frames]
from fastquant import get_stock_data import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.preprocessing import MinMaxScaler from keras.models import Sequential from keras.layers import Dense, GRU import tensorflow as tf import config as cf # parameters from config.py train_data = get_stock_data("IBM", "2006-01-01", "2016-12-31")['high'].values test_data = get_stock_data("IBM", "2017-01-01", "2017-12-31")['high'].values train_data = train_data.reshape(-1, 1) test_data = test_data.reshape(-1, 1) sc = MinMaxScaler(feature_range=(0, 1)) train_scale = sc.fit_transform(train_data) test_scale = sc.transform(test_data) X_train = [] Y_train = [] for i in range(60, len(train_scale)): X_train.append(train_scale[i - 60:i, 0]) Y_train.append(train_scale[i, 0]) x_train, y_train = np.array(X_train), np.array(Y_train)
)[["cleaned_tweet"]].sample(n=1).iat[0, 0]) else: st.markdown( f"No tweet for {random_stock} between {start_date} and {end_date}" ) # Divider st.markdown(f"<hr>", unsafe_allow_html=True) # NOTE: Loading Stock Price data from fastquant st.markdown( f"<h2 style='color: black;'> Stock Price Action for <span style='font-weight: bold;'> {random_stock} </span></h2>", unsafe_allow_html=True) stock_fq = random_stock.lstrip('$') stock_df = get_stock_data(stock_fq, start_date - datetime.timedelta(days=7), date.today()) # stock_df = stock_df.join fig1 = px.line(stock_df, x="dt", y="close") st.plotly_chart(fig1) # NOTE: DataFrame of Raw Data st.markdown( f"<h2 style='color: black;'> <span style='font-weight: bold;'> {random_stock} </span> Tweets based on date range </h2>", unsafe_allow_html=True) modified_data = data.loc[(data['tweet_created_at'].dt.date >= start_date) & (data['tweet_created_at'].dt.date <= end_date) & (data['stock_code'] == random_stock), ['username', 'tweet_created_at', 'cleaned_tweet']] st.markdown( f"{len(modified_data)} tweet/s between {start_date} and {end_date}")
self.model.plot_continuous( preds=preds, train=train_data, actual=actuals, title= f'{self.model.name} {ticker} forecasted vs actual continuous stock price' ) # store average MAPE error avg_error = error / test_n self.results[f'{self.model.name}:{ticker}'] = avg_error # write errors to file dump = json.dumps(self.results) output_file = open(self.f, 'w') output_file.write(dump) output_file.close() if __name__ == "__main__": test = rolling_window_tests['test6']['window'] df = get_stock_data(test['ticker'], test['start'], test['end']) print('df') print(df) for i in range(0, 100, 10): train = df.iloc[i:i + 1155] test = df.iloc[i + 1155:i + 1155 + 10] print(i) print(train) print(test)
from fastquant import backtest, get_stock_data import pickle from gaussian_hmm import * import json #from forecasting_train_predictor import * #from overfit_train_predictor import * # want to train on this data, need access to gaussian_hmm file df = get_stock_data('AAPL', '2020-01-01', '2020-12-31') params = { 'n_components': 2, 'algorithm': 'map', 'n_iter': 100, 'd': 5, 'name': 'GHMM' } def percentage_gain(init, end): return (end - init) / init training = { 'AAPL': { 'start': '2017-08-01', 'end': '2019-01-01' }, 'IBM': { 'start': '2017-08-01', 'end': '2019-01-01'
def test_get_stock_data(): stock_df = get_stock_data(PHISIX_SYMBOL, DATE_START, DATE_END) assert isinstance(stock_df, pd.DataFrame) stock_df = get_stock_data(YAHOO_SYMBOL, DATE_START, DATE_END) assert isinstance(stock_df, pd.DataFrame)
def rolling_window_test(self, folder, windows=10, train_size=1300, test_size=100): for test in self.tests.values(): # var to store error and test num error = 0 test_n = 0 # collect the data for the window window_params = test['window'] ticker = window_params['ticker'] window = get_stock_data(ticker, window_params['start'], window_params['end']) print('Window shape: ', window.shape) data_size = window.shape[0] # testing_size = 10 index = 0 for i in range(0, windows * test_size, test_size): print('Train range: ', i, '-', i + train_size) print('Test range: ', i + train_size, '-', i + train_size + test_size) train_data = window.iloc[i:i + train_size] test_data = window.iloc[i + train_size:i + train_size + test_size] print('Window train data shape: ', train_data.shape) print('Window test data shape: ', test_data.shape) # print(f'window {i+1}') # Make the model and train_predictor self.model_class = self.Model(params=self.params) self.model = self.model_class.gen_model() self.train_predictor = self.Train_Predictor(params=self.params) # train and predict self.model, model_history = self.train_predictor.train( model=self.model, train_data=self.model_class.preprocess_data(train_data), label_column_index=self.model_class.label_column_index) preds, actuals = self.train_predictor.predict( model=self.model, test_data=self.model_class.preprocess_data(test_data), label_column_index=self.model_class.label_column_index) # get error for this window if self.params[ 'loss'] == "mean_absolute_percentage_error": # updated to keras' name for the loss error += self.model_class.mean_abs_percent_error( y_pred=preds, y_true=actuals) elif self.params['loss'] == "root_mean_squared_error": error += self.model_class.root_mean_squared_error( y_pred=preds, y_true=actuals) # A bit weird - ask Rowan elif self.params['loss'] == "mean_squared_error": error += self.model_class.mean_abs_percent_error( y_pred=preds, y_true=actuals) else: raise ValueError( "Loss parameter isn't programmed or incorrect. Loss parameter: " + self.params['loss']) test_n += 1 print('DONE') # use last window for plotting if self.plot: self.model_class.plot_results( preds=preds, actual=actuals, title= f'{self.model_class.name} {ticker} Window {index+1} forcasted vs actual stock prices {window_params["start"]} to {window_params["end"]}', folder=folder) if model_history is not None: self.model_class.plot_loss( t_loss=model_history.history['loss'], v_loss=model_history.history['val_loss'], title= f'{self.model_class.name} {ticker} Window {index+1} train vs validation loss', folder=folder) print('DONE PLOTTING') index += 1 # store average MAPE error avg_error = error / test_n self.results[f'{self.model_class.name}:{ticker}'] = avg_error # write errors to file json_file = f'{folder}{self.model_class.name}.json' dump = json.dumps(self.results) output_file = open(json_file, 'w') output_file.write(dump) output_file.close()
from fastquant import backtest, get_stock_data jfc = get_stock_data("JFC", "2018-01-01", "2019-01-01") backtest('smac', jfc, fast_period=15, slow_period=40)
from fastquant import get_stock_data, backtest import matplotlib.pyplot as plt import numpy as np tsla = get_stock_data("TSLA", "2018-01-01", "2019-01-01") # .iloc[index] returns you index obeject complete frame 'iloc' # .index[index] returns date or 'index' of frame # .loc['volume', 'close'] ranges of columns # .attrs returns dict keys print(tsla.iloc[0].high) HIGHS = [] LOWS = [] MED_PRICE = [] FIVE_DAY = [] THIRTY_FOUR_DAY = [] AO = [] INDEX = [] def get_high_low(): for i in range(len(tsla)): HIGHS.append([tsla.iloc[i].name, tsla.iloc[i].high]) LOWS.append([tsla.iloc[i].name, tsla.iloc[i].low]) def get_med_price(): for i in range(len(HIGHS)): MED_PRICE.append([HIGHS[i][0], [(HIGHS[i][1] + LOWS[i][1]) / 2]]) print(MED_PRICE)
def load_data(in_inputs): train_data = get_stock_data(ticker, train_start_date, train_end_date)[in_inputs].values test_data = get_stock_data(ticker, test_start_date, test_end_date)[in_inputs].values train_data = train_data.reshape(-1,1) test_data = test_data.reshape(-1,1)