def get_df_prices(sym, start_str, end_str): '''Return dataframe with minute-level stock price data from start date to end date (inclusive). Args: sym (str): Ticker symbol e.g. 'BYND' start_str (str): Start date string e.g. '2020-07-18' end_str (str): End date string e.g. '2020-07-18' Returns: df (pandas.Dataframe) ''' assert start_str <= end_str end_str_mod = add_days(end_str, 3) with suppress_stdout(): df = yf.download(sym, start=start_str, end=end_str_mod, interval='1m', progress=0, prepost=True).reset_index() is_date_range = ((df['Datetime'].dt.date.astype('str') >= start_str) & (df['Datetime'].dt.date.astype('str') <= end_str)) df = df[is_date_range] df['Datetime'] = df['Datetime'].dt.tz_localize(None) #remove timezone is_reg_hours = ((df['Datetime'].dt.time.astype('str') >= '09:30:00') & (df['Datetime'].dt.time.astype('str') <= '15:59:00')) df['is_reg_hours'] = np.where(is_reg_hours, 1, 0) df['sym'] = sym df = df.rename( columns={ 'Datetime': 'datetime', 'Open': 'open', 'High': 'high', 'Low': 'low', 'Adj Close': 'adj_close', 'Volume': 'volume' }) ls_col = [ 'sym', 'datetime', 'open', 'high', 'low', 'adj_close', 'volume', 'is_reg_hours', ] return df[ls_col]
beeps(1) ################### # Update prices_d # ################### if UPDATE_PRICES_D: print(MSG_PRICES_D_1) # get max date present q = ''' SELECT DATE(MAX(date)) FROM prices_d WHERE sym='IBM' ''' max_date_str = pd.read_sql(q, db.conn).iloc[0, 0] # check dates end = add_days(datetime.datetime.today().strftime('%Y-%m-%d'), 3) #today's date plus 3 days df = yf.download('IBM', start=max_date_str, end=end, interval='1d', progress=0).reset_index() df = df[df['Date'].astype('str') > max_date_str] if not df.empty: print(MSG_PRICES_D_2.format(max_date_str, end)) # get ls_sym q = ''' SELECT sym FROM stocks WHERE sec IS NOT NULL ''' ls_sym = pd.read_sql(q, db.conn)['sym'].to_list()
def get_df_i(sym, date_str, live_data, db, num_candles_min=200): '''Returns interim dataframe with price data and trading indicators for input symbol and date Args: sym (str) date_str (str) live_data (int) db (Database object) num_candles_min (int) Returns: df_i (pandas.Dataframe) ''' start_str = prev_weekday( date_str) #start 1 day early to get prev day data for rsi etc end_str = add_days(date_str, 3) #extend end date string due to bug if live_data: with suppress_stdout(): df = yf.download(sym, start=start_str, end=end_str, interval='1m', prepost=False, progress=0).reset_index() df['Datetime'] = df['Datetime'].dt.tz_localize(None) #remove timezone df = df.rename( columns={ 'Adj Close': 'adj_close', 'Datetime': 'datetime', 'Open': 'open', 'High': 'high', 'Low': 'low', 'Volume': 'volume' }) else: q = ''' SELECT * FROM prices_m WHERE is_reg_hours = 1 AND sym='{}' AND DATE(datetime)>='{}' AND DATE(datetime)<='{}' ORDER BY datetime '''.format(sym, start_str, date_str) df = pd.read_sql(q, db.conn) df['datetime'] = pd.to_datetime(df['datetime']) df['date_str'] = df['datetime'].dt.date.astype('str') if df[df['date_str'] == start_str].empty: raise Exception(ERROR_NO_MINUTE_DATA_YTD) if df[df['date_str'] == date_str].empty: raise Exception(ERROR_NO_MINUTE_DATA_TDY) num_candles_today = df[df['date_str'] == date_str].shape[0] if num_candles_today < num_candles_min and not live_data: raise Exception(''.format(num_candles_today, num_candles_min)) df = df[df['date_str'] <= date_str] df = df[df['date_str'] >= start_str] df['sma9'] = df['adj_close'].rolling(9).mean() df['sma90'] = df['adj_close'].rolling(90).mean() df['sma180'] = df['adj_close'].rolling(180).mean() df['sma180'] = df['sma180'].fillna(df['sma90']) df['sma9_var'] = (df['adj_close'] / df['sma9']) - 1 df['sma180_var'] = (df['adj_close'] / df['sma180']) - 1 df = add_rsi(df, 14) df['spread'] = ((df['adj_close'] / df['open']) - 1).abs() df['spread14_e'] = df['spread'].ewm(span=14).mean() df['volume14'] = df['volume'].rolling(14).mean() df['volume34'] = df['volume'].rolling(34).mean() df['volume14_34_var'] = (df['volume14'] / df['volume34']) - 1 df['volume14_34_var'] = df['volume14_34_var'].fillna(0.0) prev_close = df[df['date_str'] == start_str]['adj_close'].to_list()[-1] prev_floor = df[df['date_str'] == start_str]['adj_close'].min() prev_ceil = df[df['date_str'] == start_str]['adj_close'].max() df['prev_close'] = prev_close df['prev_close_var'] = df['adj_close'] / prev_close - 1 df['prev_floor_var'] = (df['adj_close'] / prev_floor) - 1 df['prev_ceil_var'] = (df['adj_close'] / prev_ceil) - 1 df['candle_score'] = df['adj_close'] / df['open'] - 1 df['prev1_candle_score'] = df['candle_score'].shift(1) df['prev2_candle_score'] = df['candle_score'].shift(2) df['prev3_candle_score'] = df['candle_score'].shift(3) df = df[df['date_str'] == date_str] df = add_vwap(df) df = df.rename(columns={'adj_close': 'close'}) ls_col = [ 'datetime', 'close', 'sma9', 'sma180', 'rsi14', 'vwap', 'sma9_var', 'sma180_var', 'vwap_var', 'spread14_e', 'volume14_34_var', 'prev_close', 'prev_close_var', 'prev_floor_var', 'prev_ceil_var', 'prev1_candle_score', 'prev2_candle_score', 'prev3_candle_score', ] df = df[ls_col] ls_col_na = df.columns[df.isna().any()].tolist() if ls_col_na: raise Exception(ERROR_NULL_COL.format(ls_col_na)) return df.reset_index(drop=1)
def get_df_i(sym, date_str, live_data, db): start_str = prev_weekday( date_str) #start 1 day early to get prev day data for rsi etc end_str = add_days(date_str, 3) #extend end date string due to bug if live_data: with suppress_stdout(): df = yf.download(sym, start=start_str, end=end_str, interval='1m', prepost=False, progress=0).reset_index() df['Datetime'] = df['Datetime'].dt.tz_localize(None) #remove timezone df = df.rename( columns={ 'Adj Close': 'adj_close', 'Datetime': 'datetime', 'Open': 'open', 'High': 'high', 'Low': 'low', 'Volume': 'volume' }) else: q = ''' SELECT * FROM prices_m WHERE is_reg_hours = 1 AND sym='{}' AND DATE(datetime)>='{}' AND DATE(datetime)<='{}' ORDER BY datetime '''.format(sym, start_str, date_str) df = pd.read_sql(q, db.conn) df['datetime'] = pd.to_datetime(df['datetime']) df['date_str'] = df['datetime'].dt.date.astype('str') if df[df['date_str'] == start_str].empty: raise Exception('No intraday(minute interval) data for previous day!') if df[df['date_str'] == date_str].empty: raise Exception('No intraday(minute interval) data for today!') if df[df['date_str'] == date_str].shape[0] < 200 and not live_data: raise Exception( f'Less than 200 Data points for today ({df[df["date_str"]==date_str].shape[0]}), skipping!' ) df = df[df['date_str'] <= date_str] df = df[df['date_str'] >= start_str] df['sma9'] = df['adj_close'].rolling(9).mean() df['sma90'] = df['adj_close'].rolling(90).mean() df['sma180'] = df['adj_close'].rolling(180).mean() df['sma180'] = df['sma180'].fillna(df['sma90']) df['sma9_var'] = (df['adj_close'] / df['sma9']) - 1 df['sma180_var'] = (df['adj_close'] / df['sma180']) - 1 df = add_rsi(df, 14) df['spread'] = ((df['adj_close'] / df['open']) - 1).abs() df['spread14_e'] = df['spread'].ewm(span=14).mean() df['volume14'] = df['volume'].rolling(14).mean() df['volume34'] = df['volume'].rolling(34).mean() df['volume14_34_var'] = (df['volume14'] / df['volume34']) - 1 df['volume14_34_var'] = df['volume14_34_var'].fillna(0.0) #df['sma90'] = df['adj_close'].rolling(90).mean() #df['sma180'] = df['adj_close'].rolling(180).mean() #df['sma180'] = df['sma180'].fillna(df['sma90']) prev_close = df[df['date_str'] == start_str]['adj_close'].to_list()[-1] prev_floor = df[df['date_str'] == start_str]['adj_close'].min() prev_ceil = df[df['date_str'] == start_str]['adj_close'].max() df['prev_close'] = prev_close df['prev_close_var'] = df['adj_close'] / prev_close - 1 df['prev_floor_var'] = (df['adj_close'] / prev_floor) - 1 df['prev_ceil_var'] = (df['adj_close'] / prev_ceil) - 1 df['candle_score'] = df['adj_close'] / df['open'] - 1 df['prev1_candle_score'] = df['candle_score'].shift(1) df['prev2_candle_score'] = df['candle_score'].shift(2) df['prev3_candle_score'] = df['candle_score'].shift(3) df = df[df['date_str'] == date_str] df = add_vwap(df) df = df.rename(columns={'adj_close': 'close'}) ls_col = [ 'datetime', 'close', 'sma9', 'sma180', 'rsi14', 'vwap', 'sma9_var', 'sma180_var', 'vwap_var', 'spread14_e', 'volume14_34_var', 'prev_close', 'prev_close_var', 'prev_floor_var', 'prev_ceil_var', 'prev1_candle_score', 'prev2_candle_score', 'prev3_candle_score', ] df = df[ls_col] ls_col_na = df.columns[df.isna().any()].tolist() if ls_col_na: #df.to_csv('temp.csv') raise Exception(f'Null found in df_i columns: {ls_col_na}, skipping!') return df.reset_index(drop=1)