def check_warrants(cls, self): """Check if local warrant files exist.""" wt_path_base = f"{self.base_dir}/tickers/warrants" dt = getDate.query('iex_eod') wt_path_dict = ({ 'Warrants: cheapest': f"{wt_path_base}/cheapest/_{dt}.parquet", 'Warrants: newest': f"{wt_path_base}/newest/_{dt}.parquet", 'Warrants: top perf ytd': f"{wt_path_base}/top_perf/_{dt}.parquet", 'Warrants: worst perf ytd': f"{wt_path_base}/worst_perf/_{dt}.parquet", 'Warrants: all': f"{wt_path_base}/all/_{dt}.parquet", 'Warrants: all historical': f"{wt_path_base}/all_hist/_{dt}.parquet" }) for key in wt_path_dict.keys(): if os.path.isfile(wt_path_dict[key]): self.sys_dict[key] = True else: self.sys_dict[key] = False
def get_nasdaq_symbol_changes(): """Get symbol change history from nasdaq.""" sym_change_url = 'https://api.nasdaq.com/api/quote/list-type-extended/symbolchangehistory' nasdaq_headers = ({ 'Host': 'api.nasdaq.com', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:94.0) Gecko/20100101 Firefox/94.0', 'Accept': 'application/json, text/plain, */*', 'Accept-Language': 'en-US,en;q=0.5', 'Accept-Encoding': 'gzip, deflate, br', 'Origin': 'https://www.nasdaq.com', 'DNT': '1', 'Connection': 'keep-alive', 'Referer': 'https://www.nasdaq.com/', 'Sec-Fetch-Dest': 'empty', 'Sec-Fetch-Mode': 'cors', 'Sec-Fetch-Site': 'same-site', 'Sec-GPC': '1', 'Pragma': 'no-cache', 'Cache-Control': 'no-cache' }) get = requests.get(sym_change_url, headers=nasdaq_headers) df_sym_change = None if get.status_code == 200: df_sym_change = (pd.DataFrame( get.json()['data']['symbolChangeHistoryTable']['rows'])) else: msg1 = 'get_nasdaq_symbol_changes failed with url' msg2 = f"and status code {str(get.status_code)}" help_print_arg(f"{msg1} {sym_change_url} {msg2}") dt = getDate.query('iex_close') path = (Path(baseDir().path, 'ref_data/symbol_ref/symbol_changes', f'_{dt}.parquet')) if isinstance(df_sym_change, pd.DataFrame): write_to_parquet(df_sym_change, path) else: raise Exception
def combine_all_intraday_data(minute='minute_1'): """Combine all intraday data, write to file.""" dt = getDate.query('iex_eod') path = Path(baseDir().path, 'intraday', minute, str(dt.year)) fpaths = list(path.glob('**/*.parquet')) df_list = [] for fpath in fpaths: try: df_list.append(pd.read_parquet(fpath)) except Exception as e: msg = f"fpath: {str(fpath)} reason: {str(e)}" help_print_arg(msg) df_all = pd.concat(df_list) fpre = f'combined_all/{minute}/' fsuf = f"{fpre}_{dt}.parquet" path_to_write = path.parent.parent.joinpath(fsuf) write_to_parquet(df_all, path_to_write)
def _get_clean_data(cls, self, sym, period, interval, proxy): """Request and clean data from yfinance.""" data = yf.download( tickers=sym, period=period, interval=interval, group_by='ticker', auto_adjust=True, prepost=False, proxy=proxy ) df = data.reset_index() df.insert(1, 'symbol', sym) (df.rename(columns={'Date': 'date', 'Open': 'fOpen', 'High': 'fHigh', 'Low': 'fLow', 'Close': 'fClose', 'Volume': 'fVolume'}, inplace=True)) df = dataTypes(df, parquet=True).df dt = getDate.query('iex_eod') self.df_yf = df[df['date'].dt.date <= dt]
def get_yf_loop_missing_hist(key='less_than_20', cs=False, sym_list=None, verb=False, refresh_missing_dates=True): """Get less_than_20 syms and call GetYfMissingDates.""" if sym_list: pass if verb: help_print_arg('get_yf_loop_missing_hist: sym_list assumed') elif key == 'get_ignore_ytd': df_all = read_clean_combined_all() dt = getDate.query('iex_eod') df_year = df_all[df_all['date'].dt.year == dt.year].copy(deep=True) vc = df_year.value_counts(subset='symbol', ascending=False) syms_one_miss = vc[(vc < (vc.max() - 1)) & (vc > 0)].index sym_list = syms_one_miss.tolist() if verb: help_print_arg('get_yf_loop_missing_hist: key==get_ignore_ytd : syms_one_miss') elif cs is True: if refresh_missing_dates: MissingHistDates(cs=True) bpath = Path(baseDir().path, "StockEOD/missing_dates/all") fpath = get_most_recent_fpath(bpath) df_dates = pd.read_parquet(fpath) # Get all symbols, reduce to common stock and adr's sym_list = df_dates['symbol'].unique().tolist() if verb: help_print_arg('get_yf_loop_missing_hist: cs=True') else: if refresh_missing_dates: MissingHistDates() bpath = Path(baseDir().path, f"StockEOD/missing_dates/{key}") fpath = get_most_recent_fpath(bpath) df_dates = pd.read_parquet(fpath) sym_list = df_dates['symbol'].unique().tolist() if verb: help_print_arg('get_yf_loop_missing_hist: sym_list from missing_dates/key') for sym in tqdm(sym_list): try: GetYfMissingDates(sym=sym) except Exception as e: help_print_arg(f"get_yf_loop_missing_hist error: {str(e)}")
def get_missing_sec_master_idx(sma_df=False): """Get missing sec reference data files.""" # sma_df is the master index file of all dates if not isinstance(sma_df, pd.DataFrame): sma_df = serverAPI('sec_master_all').df sma_df['date'] = pd.to_datetime(sma_df['date'], unit='ms') bus_days = getDate.get_bus_days(this_year=True) dt = getDate.query('iex_eod') bus_days = bus_days[bus_days['date'].dt.date <= dt].copy() dts_missing = bus_days[~bus_days['date'].isin(sma_df['date'].unique(). tolist())].copy() dts_missing['dt_format'] = dts_missing['date'].dt.strftime('%Y%m%d') for dt in tqdm(dts_missing['dt_format']): try: smi = secMasterIdx(hist_date=dt) sleep(.5) except Exception as e: msg = f"get_missing_sec_master_idx: {str(e)}" help_print_arg(msg)
def _start_for_loop(cls, self, dt, verbose, ntests): """Start for loop for syms.""" if not dt: dt = getDate.query('iex_eod') if isinstance(dt, str): try: dt = datetime.strptime(dt, '%Y%m%d').date() except ValueError: dt = datetime.strptime(dt, '%Y-%m-%dT%H:%M:%S').date() elif isinstance(dt, Timestamp): dt = dt.date() bpath = Path(baseDir().path, 'intraday', 'minute_1', str(dt.year)) error_dict = {} n = 0 # Check if ntests is a integer (if we're testing) if not isinstance(ntests, int): ntests = 5 for sym in tqdm(self.syms): try: self._get_sym_min_data(self, sym, dt, bpath, verbose) except Exception as e: error_dict[sym] = ({ 'symbol': sym, 'type': str(type(e)), 'reason': str(e) }) if verbose: msg = f"{sym} get_sym_min_data. Reason: {str(e)}" help_print_arg(msg) if ntests: # If testing, eventually break n += 1 if n > ntests: break self._error_handling(self, error_dict, bpath)
def _construct_params(cls, self): """Construct parameters for request, fpath.""" if self.get_hist_date: # Convert historical date str to datetime.date hist_dt = (datetime.datetime.strptime( self.get_hist_date, '%Y%m%d').date()) yr = hist_dt.year # Financial quarter that we are currently in f_quart = f"QTR{str((hist_dt.month - 1) // 3 + 1)}" dt_fmt = self.get_hist_date else: dt = getDate.query('sec_master') yr = dt.year # Year # Financial quarter that we are currently in f_quart = f"QTR{str((dt.month - 1) // 3 + 1)}" # Formatted year month day dt_fmt = dt.strftime('%Y%m%d') # Url suffix using the formatted date mast_suf = f"master.{dt_fmt}.idx" self.fpath = f"{self.baster}/{yr}/_{dt_fmt}.parquet" self.fpath_raw = f"{self.baster}/{yr}/raw/_{dt_fmt}.parquet" self.url = f"{self.sec_burl}/{yr}/{f_quart}/{mast_suf}"
def return_yoptions_temp_all(): """Return dataframe of all yoptions temp (today's data).""" df_all = None # If local environment if 'Algo' in baseDir().path: try: from api import serverAPI df_all = serverAPI('yoptions_temp').df except ModuleNotFoundError as me: help_print_arg(str(me)) else: # Assume production environment dt = getDate.query('iex_eod') yr = dt.year fpath = Path(baseDir().path, 'derivatives/end_of_day/temp', str(yr)) globs = list(fpath.glob('**/*.parquet')) df_list = [] [df_list.append(pd.read_parquet(path)) for path in globs] df_all = pd.concat(df_list) return df_all
def get_last_30_intradays(): """Get last 30 intraday trading days.""" bsdays = getDate.get_bus_days() dt_today = getDate.query('iex_eod') dt_30 = dt_today - timedelta(days=30) days = (bsdays[(bsdays['date'].dt.date >= dt_30) & (bsdays['date'].dt.date <= dt_today)]) df_m1 = serverAPI('iex_intraday_m1').df days_tget = (days[~days['date'].isin(df_m1['date'] .unique())].copy()) # days_tget['dt_fmt'] = days_tget['date'].dt.strftime('%Y%m%d') try: from app.tasks import execute_func for dt in days_tget['date']: kwargs = {'dt': dt} execute_func.delay('iex_intraday', **kwargs) except ModuleNotFoundError: pass
def merge_dfs(cls, self): """Merge mmo and symref dataframes.""" merge_list = ['Symbol', 'Underlying'] if ('exchange' in self.mmo_df.columns and 'exchange' in self.sym_df.columns): merge_list.append('exchange') try: df = (pd.merge(self.mmo_df, self.sym_df, on=merge_list, how='inner')) df.reset_index(inplace=True, drop=True) # df['rptDate'] = date.today() df['rptDate'] = getDate.query('cboe') # Change data types to reduce file size df = dataTypes(df, parquet=True).df # df = dataTypes(df).df except TypeError: df = pd.DataFrame() return df
def _clean_process_missing(cls, self, df): """Clean and process missing dates df for data requests.""" # Format dates for iex dts = df['date'].drop_duplicates() m = pd.Series(dts.dt.strftime('%Y%m%d')) m.index = dts df['dt'] = df['date'].map(m) # Iex exact date url construction df['sym_lower'] = df['symbol'].str.lower() try: df['url'] = (df.apply( lambda row: f"/stock/{row['sym_lower']}/chart/date/{row['dt']}?chartByDay=true", axis=1)) except KeyError: help_print_arg( f"_clean_process_missing: lambda KeyError: {str(df.columns)}") # Construct .parquet paths for reading/writing to local data file dt = getDate.query('iex_previous') bpath = Path(baseDir().path, 'StockEOD', str(dt.year)) df['path_parq'] = (df.apply(lambda row: Path( bpath, row['symbol'].lower()[0], f"_{row['symbol']}.parquet"), axis=1)) return df
def get_most_recent_fpath(fpath_dir, f_pre='', f_suf='', dt='', this_year=True, second=False): """Get the most recent fpath in a directory.""" path_to_return = False if not dt: # If no date passed, default to iex_eod dt = getDate.query('iex_close') dt_list = getDate.get_bus_days(this_year=this_year) date_list = (dt_list[dt_list['date'].dt.date <= dt] .sort_values(by=['date'], ascending=False)) date_list['fpath'] = (date_list.apply(lambda row: f"_{row['date'].date()}", axis=1)) date_list['fpath_yr'] = (date_list.apply(lambda row: f"_{row['date'].year}", axis=1)) date_list['fpath_fmt'] = (date_list.apply(lambda row: f"_{row['date'].date().strftime('%Y%m%d')}", axis=1)) # Iterate through dataframe to find the most recent for index, row in date_list.iterrows(): tpath = Path(fpath_dir, f"{f_pre}{row['fpath']}{f_suf}.parquet") if tpath.exists(): return tpath # Iterate through dataframe to find the most recent for index, row in date_list.iterrows(): tpath = Path(fpath_dir, f"{f_pre}{row['fpath_yr']}{f_suf}.parquet") if tpath.exists(): return tpath # Iterate through dataframe to find the most recent for index, row in date_list.iterrows(): tpath = Path(fpath_dir, f"{f_pre}{row['fpath_fmt']}{f_suf}.parquet") if tpath.exists(): return tpath """ if not f_pre and not f_suf: for index, row in date_list.iterrows(): if Path(fpath_dir, f"{row['fpath']}.parquet").exists(): path_to_return = Path(fpath_dir, f"{row['fpath']}.parquet") return path_to_return elif f_pre and not f_suf: for index, row in date_list.iterrows(): if Path(fpath_dir, f"{f_pre}{row['fpath']}.parquet").exists(): path_to_return = Path(fpath_dir, f"{f_pre}{row['fpath']}.parquet") return path_to_return elif not f_pre and f_suf: for index, row in date_list.iterrows(): if Path(fpath_dir, f"{row['fpath']}{f_suf}.parquet").exists(): path_to_return = Path(fpath_dir, f"{row['fpath']}{f_suf}.parquet") return path_to_return elif f_pre and f_suf: for index, row in date_list.iterrows(): if Path(fpath_dir, f"{f_pre}{row['fpath']}{f_suf}.parquet").exists(): path_to_return = Path(fpath_dir, f"{f_pre}{row['fpath']}{f_suf}.parquet") return path_to_return """ if not path_to_return and not second: path_to_return = get_most_recent_fpath(fpath_dir, this_year=False, second=True) if path_to_return: help_print_arg(f"get_most_recent_fpath: first failed. Returning {str(path_to_return)}") return path_to_return if not path_to_return: msg_1 = "Directory empty or path doesn't follow format '_dt.parquet'. Returning first path" msg_2 = f": {fpath_dir}" help_print_arg(f"{msg_1} {msg_2}") paths = list(Path(fpath_dir).glob('*.parquet')) if paths: path_to_return = paths[-1] return path_to_return
from api import serverAPI # %% codecell from missing_data.get_missing_hist_from_yf import get_yf_loop_missing_hist sym_list = ['GENI'] get_yf_loop_missing_hist(sym_list=sym_list) # %% codecell # I'd like 2 dataframes: # One with the data updated every 10 minutes that I can # clean later on # Another with the raw timestamps that # I'll have to sort through later on from multiuse.path_helpers import get_most_recent_fpath dt = getDate.query('iex_close') bpath = Path(baseDir().path, 'derivatives/cboe_intraday/2021') fpath = get_most_recent_fpath(bpath, f_suf='_eod', dt=dt) # %% codecell from workbooks.fib_funcs import read_clean_combined_all df_all = read_clean_combined_all() # Eagles golf club # path_eod won't have the timestamps # %% codecell # %% codecell sapi_eod = serverAPI('cboe_intraday_eod') df_cboe = sapi_eod.df
def get_rows(df_sym, max_row, verb=False, calc_date_range=False): """Get rows adjacent to the max row to use for analysis.""" # Rules - there should only be one fibonacci movement # Limit to major runs - one to two candles # Start with the highest volume for the time period (coincidentally the highest % change also) rows = None n_list = list(range(50)) # Okay so this goes one row forward and sees if conditions are met min_idx = max_row.index[0] max_idx = max_row.index[0] # Moving forward for n in n_list: try: row = df_sym.loc[max_idx + (n + 1)] if (row['fChangeP'] > -0.005) & (row['fHigh'] > max_row['fHigh'].iloc[0]): if verb: print(f"Max idx: {str(max_idx + (n + 1))}") elif (abs(row['fChangeP']) < .0035) & (row['fHigh'] > max_row['fLow'].iloc[0]): if verb: print(f"Max idx: {str(max_idx + (n + 1))}") else: if verb: print(f"Max idx {str(row)}") max_idx = max_idx + n break except (KeyError, IndexError) as ke: # When the index value isn't in the sym_df if verb: print(f"Max idx ke error: {str(ke)}") max_idx = max_idx + n break # Moving back for n in n_list: try: row = df_sym.loc[min_idx - (n + 1)] row_pre = df_sym.loc[min_idx - (n)] # If not, go one row back if (row['fChangeP'] >= -.005) & (row['fClose'] < max_row['fClose'].iloc[0]) & ( row['fClose'] > row['fOpen']): # rows = pd.concat([row, max_row]).sort_values(by=['fVolume'], ascending=True) if verb: print( f"Min idx: {str(min_idx - (n + 1))}: pos rowChangeP & fClose < max_row['fClose']" ) elif ((abs(row['fChangeP']) < .0035) & (row['fHigh'] * .995 < max_row['fLow'].iloc[0]) & (row['fHigh'] > row_pre['fHigh']) & (row['fLow'] > row_pre['fLow'])): if verb: print( f"Min idx: {str(min_idx - (n + 1))}: row['fHigh'] * .995 < max_row['fLow']" ) elif (((max_row['fLow'].iloc[0] > row['fLow']) & (max_row['fClose'].iloc[0] > row['fHigh'])) & (row['fHigh'] > row_pre['fHigh']) & (row['fLow'] > row_pre['fLow'])): if verb: print( f"Min idx: {str(min_idx - (n + 1))}: max_row['fLow'].iloc[0] > row['fLow']" ) elif ((max_row['fLow'].iloc[0] > row['fLow']) & (max_row['fHigh'].iloc[0] < row['fOpen'])): min_idx = min_idx - (n + 1) if verb: print( f"Min idx condition reached: {str(min_idx - (n + 1))}: max_row['fHigh'] < row['fOpen']" ) break elif ((max_row['fLow'].iloc[0] - row['fHigh']) > .5): min_idx = min_idx - (n + 1) if verb: print( f"Min idx condition reached: {str(min_idx - (n + 1))}: max_row['fLow'] - row['fHigh'] > .5" ) break else: min_idx = min_idx - n if verb: print(f"Min idx condition reached: {str(row)}") break except (KeyError, IndexError) as ke: if verb: print(f"Min idx ke error: {str(ke)}") min_idx = min_idx - n break if min_idx == max_idx: rows = max_row if verb: print(f"Min idx == max_idx for symbol {max_row['symbol'].iloc[0]}") else: rows = df_sym.loc[min_idx:max_idx] if verb: print(f"Max row symbol == {max_row['symbol'].iloc[0]}") print(f"Max row index == {max_row.index}") print(f"Min Idx == {min_idx}") print(f"Max Idx == {max_idx}") print(f"Rows shape {rows.shape[0]}") if calc_date_range: holidays_fpath = Path(baseDir().path, 'ref_data/holidays.parquet') holidays = pd.read_parquet(holidays_fpath) dt = getDate.query('sec_master') current_holidays = (holidays[(holidays['date'].dt.year >= dt.year) & (holidays['date'].dt.date <= dt)]) hol_list = current_holidays['date'].dt.date.tolist() (rows.insert( 2, "date_range", rows.apply(lambda row: np.busday_count(rows['date'].min().date(), rows['date'].max().date(), holidays=hol_list), axis=1))) return rows
from scripts.dev.api import serverAPI except ModuleNotFoundError: from multiuse.help_class import baseDir, getDate, write_to_parquet, help_print_arg from data_collect.iex_class import urlData from api import serverAPI from importlib import reload import sys reload(sys.modules['multiuse.help_class']) # %% codecell pd.set_option('display.max_columns', 65) pd.set_option('display.max_rows', 150) # %% codecell dt = getDate.query('iex_eod') bpath = Path(baseDir().path, 'intraday', 'minute_1', str(dt.year)) all_syms = serverAPI('all_symbols').df syms = all_syms['symbol'].unique() # %% codecell minute = 'minute_1' def combine_all_intraday_data(minute='minute_1'): """Combine all intraday data, write to file.""" dt = getDate.query('iex_eod') path = Path(baseDir().path, 'intraday', minute, str(dt.year))
# %% codecell df_all['fourWC'] = np.where( ((df_all['symbol'] == df_all['prev_symbol']) & (df_all['fChangeP'] > .01) & (df_all['fClose'] > df_all['fClose'].shift(-1, axis=0)) & (df_all['fClose'].shift(1, axis=0) > df_all['fClose'].shift(2, axis=0)) & (df_all['fClose'].shift(2, axis=0) > df_all['fClose'].shift(3, axis=0)) & (df_all['fClose'].shift(3, axis=0) > df_all['fClose'].shift(4, axis=0)) & (df_all['fClose'].shift(4, axis=0) > df_all['fClose'].shift(5, axis=0))), (df_all.index - 5), 0) holidays_fpath = Path(baseDir().path, 'ref_data/holidays.parquet') holidays = pd.read_parquet(holidays_fpath) dt = getDate.query('sec_master') current_holidays = (holidays[(holidays['date'].dt.year >= dt.year) & (holidays['date'].dt.date <= dt)]) hol_list = current_holidays['date'].dt.date.tolist() df_four = df_all[(df_all['fourWC'] != 0)].copy() df_four.insert(0, 'prevSymDate', df_four['date'].shift(1, axis=0)) df_four.insert(3, 'prevSymSub', df_four['symbol'].shift(1, axis=0)) cols_to_keep = ['prevSymDate', 'date', 'symbol', 'prevSymSub', 'fourWC'] df_four_sub = df_four[df_four['symbol'] == df_four['prevSymSub']][cols_to_keep].copy() (df_four_sub.insert( 2, "dayDiff", df_four_sub.apply(lambda row: np.busday_count( row['prevSymDate'].date(), row['date'].date(), holidays=hol_list),
importlib.reload(sys.modules['workbooks.fib_funcs']) from workbooks.fib_funcs import read_clean_combined_all df_all = read_clean_combined_all(local=True) df_all['date'] = pd.to_datetime(df_all['date']) df_2021 = df_all[df_all['date'].dt.year == 2021].copy(deep=True) # %% codecell # %% codecell # get_yf_loop_missing_hist(key='get_ignore_ytd') all_syms = serverAPI('all_symbols').df all_syms.columns # %% codecell dt = getDate.query('iex_previous') dt url1 = 'ftp://ftp.nyxdata.com/' url2 = 'cts_summary_files' url3 = f"CTA.summary.{dt.strftime('%Y%m%d')}" url = f"{url1}{url2}" get = requests.get() import shutil import urllib.request as request from contextlib import closing from urllib.error import URLError file = url3
def read_clean_combined_all(local=False, dt=None, filter_syms=True): """Read, clean, and add columns to StockEOD combined all.""" df_all = None if local: bpath = Path(baseDir().path, 'StockEOD/combined_all') fpath = get_most_recent_fpath(bpath) cols_to_read = [ 'date', 'symbol', 'fOpen', 'fHigh', 'fLow', 'fClose', 'fVolume' ] df_all = pd.read_parquet(fpath, columns=cols_to_read) if df_all['date'].dtype == 'object': df_all['date'] = pd.to_datetime(df_all['date']) df_all.drop_duplicates(subset=['symbol', 'date'], inplace=True) else: cols_to_read = [ 'date', 'symbol', 'fOpen', 'fHigh', 'fLow', 'fClose', 'fVolume' ] df_all = serverAPI('stock_close_cb_all').df df_all = df_all[cols_to_read] if filter_syms: all_cs_syms = remove_funds_spacs() df_all = df_all[df_all['symbol'].isin( all_cs_syms['symbol'])].copy() df_all['date'] = pd.to_datetime(df_all['date']) # Define base bpath for 2015-2020 stock data bpath = Path(baseDir().path, 'historical/each_sym_all') path = get_most_recent_fpath( bpath.joinpath('each_sym_all', 'combined_all')) df_hist = pd.read_parquet(path) # Combine 2015-2020 stock data with ytd df_all = pd.concat([df_hist, df_all]).copy() df_all.drop_duplicates(subset=['symbol', 'date'], inplace=True) df_all.reset_index(drop=True, inplace=True) if not dt: dt = getDate.query('iex_eod') # Exclude all dates from before this year df_all = (df_all[df_all['date'] >= str(dt.year)].dropna( subset=['fVolume']).copy()) # Get rid of all symbols that only have 1 day of data df_vc = df_all['symbol'].value_counts() df_vc_1 = df_vc[df_vc == 1].index.tolist() df_all = (df_all[~df_all['symbol'].isin(df_vc_1)].reset_index( drop=True).copy()) # Sort by symbol, date ascending df_all = df_all.sort_values(by=['symbol', 'date'], ascending=True) df_all['fRange'] = (df_all['fHigh'] - df_all['fLow']) df_all['vol/mil'] = (df_all['fVolume'].div(1000000)) df_all['prev_close'] = df_all['fClose'].shift(periods=1, axis=0) df_all['prev_symbol'] = df_all['symbol'].shift(periods=1, axis=0) # Add fChangeP col print('Fib funcs: adding fChangeP column') df_all = add_fChangeP_col(df_all) # Merge with df_all and resume # Add gap column print('Fib funcs: adding gap column') df_all = add_gap_col(df_all) # Add range of gap df_all['gRange'] = (np.where(df_all['prev_close'] < df_all['fLow'], df_all['fHigh'] - df_all['prev_close'], df_all['fHigh'] - df_all['fLow'])) df_all['cumPerc'] = np.where(df_all['symbol'] == df_all['prev_symbol'], df_all['fChangeP'].cumsum(), np.NaN) df_all['perc5'] = np.where(df_all['symbol'] == df_all['prev_symbol'], df_all['cumPerc'].shift(-5) - df_all['cumPerc'], np.NaN) df_all['vol_avg_2m'] = np.where(df_all['symbol'] == df_all['prev_symbol'], df_all['fVolume'].rolling(60).mean(), np.NaN) # Add cumulative sum of last 5 fChangeP rows df_all['fCP5'] = (np.where( df_all['symbol'] == df_all['prev_symbol'], df_all['fChangeP'].rolling(min_periods=1, window=5).sum(), 0)) df_all = df_all.copy() # Calc RSI and moving averages print('Fib Funcs: calc_rsi') df_all = calc_rsi(df_all) print('Fib Funcs: making_moving_averages') df_all = make_moving_averages(df_all) # fHighMax funcs print('Fib funcs: fHighMax') df_all = add_fHighMax_col(df_all).copy() df_all = df_all.sort_values(by=['symbol', 'date'], ascending=True) float_32s = df_all.dtypes[df_all.dtypes == np.float32].index for col in float_32s: df_all[col] = df_all[col].astype(np.float64).round(3) df_all = dataTypes(df_all, parquet=True).df.copy() return df_all