def add_options(df, ticker): df_options = pd.read_pickle(stock_io.options_all_data) df_options = df_options[df_options['symbol'] == ticker] cols_options = ['LTCallFlow', 'STCallFlow', 'LTPutFlow', 'STPutFlow'] for col in cols_options: df_options[col] = df_options[col].fillna(0) df_options[col] = df_options[col] / df_options[ 'Adj Close'] / df_options['Volume'] * 1000 * 1000 df_options = df_options.drop( ['symbol', 'CallFlow', 'PutFlow', 'Adj Close', 'Volume'], axis=1) df_options = ts_to_features.mongodb_format(df_options) for col in cols_options: df_options[col] = df_options[col].fillna(0) df_options = fe_pipeline(df_options, cols_options, scale_ma_flag=False, drop_col=True) df = pd.merge(df, df_options, how='left', on='date') odf_cols = util.show_cols(df, 'Flow_') for col in odf_cols: df[col] = df[col].fillna(0) return df
def add_pc_ratio(file_name, col_name): df = pd.read_pickle(file_name) df = df[['date', 'PCRatio']] df = df.rename(columns={'PCRatio': col_name}) df = ts_to_features.mongodb_format(df) df = fe_pipeline(df, [col_name], scale_ma_flag=False, drop_col=True) return df
def add_other_tickers(df, ticker_list): df_tickers = pd.read_pickle(stock_io.ref_data) df_tickers = ts_to_features.mongodb_format(df_tickers) for ticker in ticker_list: df_one_ticker = df_tickers[['date', ticker]] df_one_ticker = fe_pipeline(df_one_ticker, [ticker], drop_col=True) df = pd.merge(df, df_one_ticker, how='left', on='date') return df
import talib #from talib import abstract # UDF import stock_io import ts_to_features ticker = 'SPY' use_yahoo_flag = 0 if use_yahoo_flag: df = pd.read_csv(stock_io.raw_data.format(ticker)) else: df = pd.read_pickle(stock_io.pkl_data.format(ticker)) df = ts_to_features.mongodb_format(df) df['SMA'] = talib.SMA(df['Close']) #df['CDL2CROWS'] = talib.CDLHAMMER(df['Open'], df['High'], df['Low'], df['Close']) # ## not working ##df['CDL3BLACKCROWS'] = abstract.Function('CDL3BLACKCROWS')(df['Open'], df['High'], df['Low'], df['Close']) # ## alternative #cdl_func = eval('talib.'+'CDL3BLACKCROWS') #df['CDL3BLACKCROWS'] = cdl_func(df['Open'], df['High'], df['Low'], df['Close']) #print(talib.get_functions()) #print(talib.get_function_groups())
def run_grid_search(ticker, params): # up_down_threshold = 0.002 #0.2% # total_shifts = 10 up_down_threshold = conf_man['up_down_threshold'] total_shifts = conf_man['total_shifts'] use_stocks_all_data = 1 use_pc_flag = params['use_pc_flag'] use_other_tickers = params['use_other_tickers'] use_cdl_patt = params['use_cdl_patt'] use_short_vol_flag = params['use_short_vol_flag'] use_options = params['use_options_flag'] ticker_list = params['ticker_list'] if ticker in ticker_list: ticker_list.remove(ticker) patt_list = ['CDLBELTHOLD', 'CDLCLOSINGMARUBOZU', 'CDLDOJI', 'CDLENGULFING', 'CDLHARAMI', 'CDLHIGHWAVE', 'CDLHIKKAKE', 'CDLLONGLEGGEDDOJI', 'CDLMARUBOZU', 'CDLRICKSHAWMAN', 'CDLSHORTLINE'] print_features_flag = 0 if use_stocks_all_data: df = pd.read_pickle(stock_io.stocks_all_data) df = df[df['symbol'] == ticker] df = ts_to_features.mongodb_format(df) else: df = pd.read_pickle(stock_io.pkl_data.format(ticker)) df = ts_to_features.mongodb_format(df) df = ts_to_features.data_format(df) start_date = conf_man['train_start_date'] test_date = conf_man['test_start_date'] df = df[df.date >= start_date] df_close = df.copy() df_close = df_close[['date', 'Close']] # use adj close instead of close #df = df.drop(['Close'], axis=1) #df = df.rename(columns = {'Adj Close':'Close'}) if use_short_vol_flag: df = df.drop(['Adj Close', 'ShortVolume'], axis=1) else: df = df.drop(['Adj Close', 'ShortVolume', 'short_vol_pct'], axis=1) df = df.sort_values(by=['date']) df_raw_copy = df.copy() # start feature engineering df['CO_HL'] = (df['Close'] - df['Open']) / (df['High'] - df['Low']) df['HC_HL'] = (df['High'] - df['Close']) / (df['High'] - df['Low']) shift_only_cols = ['CO_HL', 'HC_HL'] # add options if use_options: df = stock_fe.add_options(df, ticker) # add candle patterns if use_cdl_patt: df_cdl = df_raw_copy.copy() df_cdl = ta_util.add_cdl(df_cdl, patt_list) else: df_cdl = pd.DataFrame({'empty' : []}) # add MAs df = ts_to_features.add_mas(df, ['Close']) if use_short_vol_flag: df = ts_to_features.add_mas(df, ['Volume', 'short_vol_pct'], [20]) else: df = ts_to_features.add_mas(df, ['Volume'], [20]) # normalize df['Close_raw'] = df['Close'] df = ts_to_features.add_ratio(df, ['Open', 'High', 'Low', 'Close', 'Close_ma10'], 'Close_ma20') df = ts_to_features.add_ratio(df, ['Volume'], 'Volume_ma20') if use_short_vol_flag: df = ts_to_features.add_ratio(df, ['short_vol_pct'], 'short_vol_pct_ma20') # ## single shift ##df = ts_to_features.add_shift_cols(df, shift_cols, 1) # multi shifts shift_cols = ['Open', 'High', 'Low', 'Close', 'Volume', 'Close_ma10', 'CO_HL', 'HC_HL'] if use_short_vol_flag: shift_cols.append('short_vol_pct') df = ts_to_features.add_multi_shifts(df, shift_cols, total_shifts) # add fake-date for forecasting df = ts_to_features.clone_last_row(df, shift_cols, days = 3) # add target df = ts_to_features.add_shift_cols(df, ['Close_raw'], 1) df['target'] = 0 df['target'] = np.where(df['Close_raw'] >= df['Close_raw_lag1d'] * (1+up_down_threshold), 1, df['target']) df['target'] = np.where(df['Close_raw'] <= df['Close_raw_lag1d'] * (1-up_down_threshold), -1, df['target']) df['target_reg'] = df['Close_raw'] / df['Close_raw_lag1d'] - 1 df = ts_to_features.remove_na(df, 'target_reg') # for ts debug's purpose #df_debug = df[['date', 'Close', 'Close_lag0d', 'Close_lag1d', 'Close_lag2d', 'Close_lag3d', 'Close_raw', 'Close_raw_lag1d', 'target']] # ML drop_list = ['Open', 'High', 'Low', 'Close', 'Volume', 'CO_HL', 'HC_HL', 'Close_ma10', 'Close_ma20', 'Volume_ma20', 'Close_raw', 'Close_raw_lag1d'] if use_short_vol_flag: drop_list.extend(['short_vol_pct', 'short_vol_pct_ma20']) lag0d_list = util.show_cols(df, 'lag0d') drop_list += lag0d_list df = df.drop(drop_list, axis=1) if use_pc_flag: df = stock_fe.add_pc_ratios(df) if use_other_tickers: df = stock_fe.add_other_tickers(df, ticker_list) # #if use_btc_flag: # df = ts_to_features.add_btc(df) # # print('Ticker: ', ticker) if use_short_vol_flag: print('Use short volume pct') # 1 to 3 day fcst output_dict = {'Ticker':ticker} for i in range(3): n = i+1 day_outout_dict = stock_ml.nth_day_fcst(df, df_cdl, n, patt_list, test_date, use_cdl_patt, print_features_flag=print_features_flag) output_dict.update(day_outout_dict) print(output_dict) return output_dict
import ts_to_features start_date = '2018-01-01' shift_flag = True shifts = 15 df_spy = pd.read_pickle('spy.pkl') df_qqq = pd.read_pickle('qqq.pkl') df_btc = pd.read_pickle('btc.pkl') # ETF df_spy = df_spy[['date', 'Close']] df_spy = df_spy.rename(columns={'Close': 'SPY'}) df_spy = ts_to_features.mongodb_format(df_spy) df_qqq = df_qqq[['date', 'Close']] df_qqq = df_qqq.rename(columns={'Close': 'QQQ'}) df_qqq = ts_to_features.mongodb_format(df_qqq) df_btc = df_btc[['date', 'price']] df_btc = df_btc.rename(columns={'price': 'BTC'}) df_btc = ts_to_features.mongodb_format(df_btc) # merge df_merge = pd.merge(df_spy, df_qqq, how='inner', on='date') df_merge = pd.merge(df_merge, df_btc, how='inner', on='date') # index to date df = df_merge.copy()
start_date = '2018-01-01' shift_flag = True shifts = 15 ticker = 'SLV' df_spy = pd.read_pickle('spy.pkl') df_qqq = pd.read_pickle('qqq.pkl') df_ticker = pd.read_pickle('tickers.pkl') # ETF df_spy = df_spy[['date', 'Close']] df_spy = df_spy.rename(columns = {'Close':'SPY'}) df_spy = ts_to_features.mongodb_format(df_spy) df_qqq = df_qqq[['date', 'Close']] df_qqq = df_qqq.rename(columns = {'Close':'QQQ'}) df_qqq = ts_to_features.mongodb_format(df_qqq) df_ticker = df_ticker[['date', ticker]] df_ticker = ts_to_features.mongodb_format(df_ticker) # merge df_merge = pd.merge(df_spy, df_qqq, how = 'inner', on = 'date') df_merge = pd.merge(df_merge, df_ticker, how = 'inner', on = 'date') # index to date df = df_merge.copy()