def window(MIN_LAG, MAX_LAG, STEP, STOCK_name, price, next_day): ''' :Arguments: MIN_LAG : MAX_LAG : STEP : STOCK_index_ : price : :Return: ''' #get the data we need data = loc.read_csv(STOCK_name + str('.csv')) #the stock class stock_data = stock(data) #get OHLC, HL_PCT, PCT_CHNG df_OHLC = pd.concat([stock_data.OHLC(), stock_data.HL_PCT()], axis=1) #listing... process_time(df_OHLC) #extract specific time features #forecastiing parameter Day_of_week = [x for x in df_OHLC.DayOfTheWeek.unique()] #forecast dates ahead minus holidays def filter_Mexico_holidays(df, nextday): ''' :Arguments: df: dataframe :Nextday: datetime format :Return: List of filtered dates using US calender ''' import holidays #holidays in Mexico us_holidays = holidays.Mexico() hol_dates = [] dat_frac = list((pd.bdate_range(pd.to_datetime(df_OHLC.index[-1]), next_day)).date) #iterate using date index for ii in range(len(dat_frac)): print(dat_frac[ii]) if isinstance(us_holidays.get(dat_frac[ii]), str): hol_dates.append(dat_frac[ii]) if hol_dates == []: print('No holidays') else: for ii in hol_dates: print('Holiday present on {}'.format(ii)) dat_frac = sorted( [x for x in set(dat_frac).difference(set(hol_dates))])[1:] return (dat_frac, hol_dates) print('*' * 30) print('Fininshed extracting holidays') dt_range, hol_dates = filter_Mexico_holidays(df_OHLC, next_day) trad_days = len(dt_range) #all series al_dt = list(df_OHLC.index) + list(dt_range) #lagged_time series forecast_window = pd.DataFrame(al_dt, columns=['timestamp']) forecast_window.set_index('timestamp', inplace=True) print('Forecast ahead dates created') ''' Prrice shift ''' print('Create laggs..') def lagg(param, df, t_days): ''' :Arguments: :param: feature :df: dataframe :t_days: forecast window/days ahead :Return: values at time t and t+x where x = 1,...,n ''' df_param_t = list(df[param]) df_param_t_1 = list(df[param].shift(t_days)) df_param_t_plus = list(df.ix[-t_days:, param]) return df_param_t_1, df_param_t_plus ''' HL_PCT shift ''' #create laggs for every feature in stock i.e OHLC df_ = {} params = [price, 'HL_PCT', 'PCT_CHNG'] '''create a loop here for forecasting each feature''' for ii in params: df_['df_{}_t_1'.format(ii)], df_['df_{}_t_plus'.format(ii)] = lagg( ii, df_OHLC, trad_days) #lagged for ii in df_['df_{}_t_plus'.format(params[0])]: df_['df_{}_t_1'.format(params[0])].append(ii) for ii in df_['df_{}_t_plus'.format(params[1])]: df_['df_{}_t_1'.format(params[1])].append(ii) for ii in df_['df_{}_t_plus'.format(params[2])]: df_['df_{}_t_1'.format(params[2])].append(ii) #create the forecast laggs for ii, val in df_.items(): if len(val) > trad_days: # if len(val) == len forecast_window['lagged_' + str('{}'.format(ii))] = val # else: # raise('Incorrect data setting.\nDate should be shifted forward..') forecast_window = forecast_window.dropna() #convert to stock class #Exponential laggs for each feature EWM_m = {} for w in forecast_window.columns: for ij in range(MIN_LAG, MAX_LAG, STEP): EWM_m['{}_{}'.format(w, ij)] = forecast_window[w].ewm(ij).mean() for p, q in EWM_m.items(): forecast_window['{}'.format(p)] = q #delta time time_dt = pd.DataFrame({'timestamp': forecast_window.index}) process_time(time_dt).set_index('timestamp', inplace=True) #filter weekends from data time_dt = time_dt.loc[time_dt.DayOfTheWeek.isin(Day_of_week)] #keep feature columns time_dt = time_dt.loc[:, [x for x in OHLC_features_]] forecast_window = pd.concat([forecast_window, time_dt], axis=1) return (forecast_window, trad_days, dt_range)
DATA_LIMIT = 400 #--------RSI_SETTINGS------------------------ LOWER_BOUND = 30 UPPER_BOUND = 70 #--------MACD SETTINGS----------------------- FAST = 12 SLOW = 26 SIGNAL = 9 loc.set_path(path + 'DATASET') #-------get the data we need------------------ STOK_list = ls_STOK() Signal_Gen = {} for ii in range(DATA_LIMIT): print('{}'.format(STOK_list[ii])) data = loc.read_csv('{}'.format(STOK_list[ii]) + str('.csv')) data.index = pd.to_datetime(data.index) #-----convert to the stock class-------------- stock_data = stock(data) Fibo_SUP_RES_ = stock_data.fibonacci_pivot_point() df_RSI = RSI_signal(data, PERIOD, lw_bound=LOWER_BOUND, up_bound=UPPER_BOUND) df_MACD = macd_crossOver(data, FAST, SLOW, SIGNAL) df_BB = bollinger_band_signal(data, PERIOD, deviation=DEVIATION, strategy=STRATEGY) #-----select strategy for saving------------------- if STRATEGY == '2' or STRATEGY == '3':
'wday_sin', #sine of trading day 'wday_cos', #cosine of trading day 'mday_sin', #sine of days of the month 'mday_cos', #cosine of days of the month 'yday_sin', #sine of day of year 'yday_cos', #cosine of day of year 'month_sin', #sine of month 'month_cos' ] #cosine of month #set working directory loc.set_path('D:\\BITBUCKET_PROJECTS\\Forecasting 1.0\\DATASET') #stock list # STOCK_list_ = ls_STOK() STOCK_NAME = 'ALSEA.MX' data = loc.read_csv('{}'.format(STOCK_NAME) + str('.csv')) #window forecast = {} #//Extract Forecast window for pr in price: forecast_window, trad_days, dt_range = window(MIN_LAG, MAX_LAG, STEP, STOCK_NAME, pr, next_day) #train test X_train, X_test, Y_train, Y_test = Scale_train_test( forecast_window, trad_days) #yhat for all models Avg_price = Modeller(X_train, X_test, Y_train, Y_test, dt_range, params, EPOCHS) forecast[pr] = list(Avg_price)
def predict_OHLC(NXT_DAY): ''' :Arguments: STOCKLIST: List of downloaded stock in the dataset folder NXTDAY: nextday to predict stock_data: stock class :Return: Next day Open, High, Low, Close for all stock ''' #get ojects in the dataset folder and #strip extension DIR_OBJ = os.listdir() STOCK_list_ = [] for x in range(len(DIR_OBJ)): STOCK_list_.append(DIR_OBJ[x].strip('.csv')) MIN_LAG = 5 MAX_LAG = 25 STEP = 5 # process_time(df_OHLC) OHLC_features_ = ['years', #trading year 'days', #trading days 'months', #months 'DayOfTheWeek', #days of week 'time_epoch', #time epoch 'wday_sin', #sine of trading day 'wday_cos', #cosine of trading day 'mday_sin', #sine of days of the month 'mday_cos', #cosine of days of the month 'yday_sin', #sine of day of year 'yday_cos', #cosine of day of year 'month_sin', #sine of month 'month_cos'] #cosine of month ''' recall later dt_range = [] for ii in range(1, window): if list(pd.to_datetime(df_OHLC.index[-1]) + pd.to_timedelta(np.arange(window+1), 'D'))[ii].dayofweek in Day_of_week: dt_range.append(list(pd.to_datetime(df_OHLC.index[-1]) + pd.to_timedelta(np.arange(window+1), 'D'))[ii]) else: pass ''' NXT_open_ = [] NXT_high_ = [] NXT_low_ = [] NXT_close_ = [] for _i_ in range(len(STOCK_list_)): #get the data we need data = loc.read_csv(STOCK_list_[_i_] + str('.csv')) #the stock class stock_data = stock(data) #get OHLC df_OHLC = stock_data.OHLC() '''listing...''' process_time(df_OHLC) #forecastiing parameter Day_of_week = [x for x in df_OHLC.DayOfTheWeek.unique()] #NXT_DAY = datetime(2018, 12, 21) dt_range = pd.bdate_range(pd.to_datetime(df_OHLC.index[-1]), NXT_DAY)[1:] trad_days = len(dt_range) #all series # df_dt = list(df_OHLC.index) # for ii in list(dt_range): # df_dt.append(list(ii)) al_dt = list(df_OHLC.index) + list(dt_range) #lagged_time series forecast_window = pd.DataFrame({'timestamp': al_dt}) forecast_window.set_index('timestamp', inplace = True) ''' Get Open High Low Close ''' for _ix in df_OHLC.columns: '''extract columns to forecast''' df_t = list(df_OHLC[_ix]) df_t_1 = list(df_OHLC[_ix].shift(trad_days)) df_t_plus = list(df_OHLC[_ix][-trad_days:]) #lagged for w in df_t_plus: df_t_1.append(w) forecast_window['lagged_'+str('{}days'.format(str('t')))] = df_t_1 forecast_window = forecast_window.dropna() #convert to stock class for ij in range(MIN_LAG, MAX_LAG, STEP): forecast_window['lagged_t_'+str('{}'.format(ij))] = forecast_window.lagged_tdays.ewm(ij).mean() #delta time time_dt = pd.DataFrame({'timestamp': forecast_window.index}) process_time(time_dt).set_index('timestamp', inplace = True) #filter weekends from data time_dt = time_dt.loc[time_dt.DayOfTheWeek.isin(Day_of_week)] #keep feature columns time_dt = time_dt.loc[:, [x for x in OHLC_features_]] forecast_window = pd.concat([forecast_window, time_dt], axis = 1) #standardize X_transform = pd.DataFrame(StandardScaler().fit_transform(forecast_window), columns = [x for x in forecast_window.columns]) #train test splits X_train = X_transform.iloc[:-trad_days, 1:] Y_train = forecast_window.lagged_tdays[:-trad_days].values X_test = X_transform.iloc[-trad_days:, 1:] Y_test = forecast_window.lagged_tdays[-trad_days:].values #model Regress = RandomForestRegressor(max_depth = 20, random_state = 0, n_estimators = 100) #fit model Regress.fit(X_train, Y_train) #predict feature Predic_ = Regress.predict(X_test) if _ix == 'Open': NXT_open_.append(Predic_) elif _ix == 'High': NXT_high_.append(Predic_) elif _ix == 'Low': NXT_low_.append(Predic_) elif _ix == 'Close': NXT_close_.append(Predic_) return pd.DataFrame({'Stocks': STOCK_list_, 'Next_day_Open': NXT_open_, 'Next_day_High': NXT_high_, 'Next_day_Low': NXT_low_, 'Next_day_Close': NXT_close_})
DEVIATION = MULTIPLIER = 2 PERIOD = 20 #--------RSI_SETTINGS------------------------ LOWER_BOUND = 30 UPPER_BOUND = 70 MIDLINE = 0 FILLCOLOR = 'skyblue' #--------MACD SETTINGS----------------------- FAST = 12 SLOW = 26 SIGNAL = 9 loc.set_path(path + 'DATASET') #-------get the data we need------------------ STOCK_NAME = 'MSFT.MX' STOCK_list_ = ls_STOK() data = loc.read_csv(STOCK_NAME + str('.csv')) data.index = pd.to_datetime(data.index) #-----convert to the stock class-------------- stock_data = stock(data) Fibo_SUP_RES_ = stock_data.fibonacci_pivot_point() df_ketner = stock_data.Keltner_channel(data, PERIOD, PERIOD, MULTIPLIER) df_RSI = RSI_signal(data, PERIOD, lw_bound=LOWER_BOUND, up_bound=UPPER_BOUND) df_MACD = macd_crossOver(data, FAST, SLOW, SIGNAL) df_BB = bollinger_band_signal(data, PERIOD, deviation=DEVIATION, strategy=STRATEGY)