def historical(): """Historical EOD data.""" bpath = Path(baseDir().path, 'StockEOD') historical = ({ 'combined': get_most_recent_fpath(bpath.joinpath('combined')), 'combined_all': get_most_recent_fpath(bpath.joinpath('combined_all')), 'combined_year': get_most_recent_fpath(bpath.joinpath('combined_year')) })
def externals(): """External information sources.""" bpath = Path(baseDir().path) externals = ({ 'daily_breaker': get_most_recent_fpath(bpath.joinpath('short', 'daily_breaker'), f_pre='nasdaq'), 'halts': get_most_recent_fpath(bpath.joinpath('short', 'halts')) }) return externals
def company_stats(): """Company stats.""" bpath = Path(baseDir().path, 'company_stats') stats = ({ 'analyst_recs': bpath.joinpath('analyst_recs', '_2022.parquet'), 'meta': get_most_recent_fpath(bpath.joinpath('meta', 'combined')), 'stats': get_most_recent_fpath(bpath.joinpath('stats', 'combined')) }) return stats
def sec(): """SEC feeds.""" bpath = Path(baseDir().path, 'sec') dt = getDate.query('iex_eod') yr = str(dt.year) sec = ({ 'rss': get_most_recent_fpath(bpath.joinpath('rss', yr)), 'daily_idx': get_most_recent_fpath(bpath.joinpath('daily_index', yr)), 'daily_idx_combined': bpath.joinpath('daily_index', '_all_combined.parquet') }) return sec
def scans(): """Scans for stocks/other items.""" bpath = Path(baseDir().path, 'scans') scans = ({'top_vol': get_most_recent_fpath(bpath.joinpath('top_vol'))}) return scans
def _get_missing_dates_df(cls, self, key): """Get missing dates.""" key_options = ['previous', 'all', 'less_than_20'] if str(key) not in key_options: self.proceed = False # If provided key not in options bpath = Path(baseDir().path, 'StockEOD/missing_dates', key) path = get_most_recent_fpath(bpath) df_dates = pd.read_parquet(path) # Define path of null dates null_path = Path(baseDir().path, 'StockEOD/missing_dates/null_dates', '_null_dates.parquet') # Get all data that isn't null/empty if null_path.exists(): null_df = pd.read_parquet(null_path) df = (pd.merge(df_dates, null_df, how='left', indicator=True).query('_merge == "left_only"').drop( columns=['_merge'], axis=1).copy()) # If the merging failed if df.empty: df = df_dates self.null_dates = [] self.merged_df = df self.missing_df = self._clean_process_missing(self, df) self.single_df, self.multiple_df = self._get_single_multiple_dfs( self, self.missing_df)
def stocktwits(): """Stocktwits data.""" bpath = Path(baseDir().path, 'stocktwits') stocktwits = ({ 'trending': get_most_recent_fpath(bpath.joinpath('trending'), f_pre='_') }) return stocktwits
def intraday_tick(): """Intraday tick data.""" bpath_t = Path(baseDir().path, 'tickers', 'sectors') ticks = ({ 'sector_perf': get_most_recent_fpath(bpath_t, f_pre='performance'), 'treasuries': Path(baseDir().path, 'economic_data', 'treasuries.parquet') }) return ticks
def get_yf_loop_missing_hist(key='less_than_20', cs=False, sym_list=None, verb=False, refresh_missing_dates=True): """Get less_than_20 syms and call GetYfMissingDates.""" if sym_list: pass if verb: help_print_arg('get_yf_loop_missing_hist: sym_list assumed') elif key == 'get_ignore_ytd': df_all = read_clean_combined_all() dt = getDate.query('iex_eod') df_year = df_all[df_all['date'].dt.year == dt.year].copy(deep=True) vc = df_year.value_counts(subset='symbol', ascending=False) syms_one_miss = vc[(vc < (vc.max() - 1)) & (vc > 0)].index sym_list = syms_one_miss.tolist() if verb: help_print_arg('get_yf_loop_missing_hist: key==get_ignore_ytd : syms_one_miss') elif cs is True: if refresh_missing_dates: MissingHistDates(cs=True) bpath = Path(baseDir().path, "StockEOD/missing_dates/all") fpath = get_most_recent_fpath(bpath) df_dates = pd.read_parquet(fpath) # Get all symbols, reduce to common stock and adr's sym_list = df_dates['symbol'].unique().tolist() if verb: help_print_arg('get_yf_loop_missing_hist: cs=True') else: if refresh_missing_dates: MissingHistDates() bpath = Path(baseDir().path, f"StockEOD/missing_dates/{key}") fpath = get_most_recent_fpath(bpath) df_dates = pd.read_parquet(fpath) sym_list = df_dates['symbol'].unique().tolist() if verb: help_print_arg('get_yf_loop_missing_hist: sym_list from missing_dates/key') for sym in tqdm(sym_list): try: GetYfMissingDates(sym=sym) except Exception as e: help_print_arg(f"get_yf_loop_missing_hist error: {str(e)}")
def get_cboe_ref(ymaster=False): """Get cboe reference data for use on yfinance.""" df = None path = Path(baseDir().path, 'derivatives/cboe_symref') fpath = get_most_recent_fpath(path, f_pre='symref') df = pd.read_parquet(fpath) # cols_to_drop = ['Cboe Symbol', 'Closing Only'] df = df.rename(columns={'Underlying': 'symbol'}) # .drop(columns=cols_to_drop)) if ymaster: df = pd.DataFrame(df['symbol'].unique(), columns=['symbol']).copy() return df
def last_bus_day_syms(): """Read all symbols from the last business day.""" sdir = Path(baseDir().path, 'tickers', 'new_symbols') fpath = get_most_recent_fpath(sdir, f_pre='_') sym_df = False if fpath.exists(): sym_df = pd.read_parquet(fpath) else: fpath = sdir.parent.joinpath('symbol_list', 'all_symbols.parquet') if fpath.exists(): sym_df = pd.read_parquet(fpath) else: sym_df = serverAPI('all_symbols').df write_to_parquet(sym_df, fpath) return sym_df
def warrants(): """Warrant information/records.""" bpath = Path(baseDir().path, 'tickers', 'warrants') warrants = ({ 'all': get_most_recent_fpath(bpath.joinpath('all')), 'all_hist': get_most_recent_fpath(bpath.joinpath('all_hist')), 'cheapest': get_most_recent_fpath(bpath.joinpath('cheapest')), 'newest': get_most_recent_fpath(bpath.joinpath('newest')), 'top_perf': get_most_recent_fpath(bpath.joinpath('top_perf')), 'worst_pef': get_most_recent_fpath(bpath.joinpath('worst_perf')) }) return warrants
import numpy as np import requests try: from scripts.dev.multiuse.help_class import baseDir, getDate, write_to_parquet, help_print_arg from scripts.dev.multiuse.path_helpers import get_most_recent_fpath except ModuleNotFoundError: from multiuse.help_class import baseDir, getDate, write_to_parquet, help_print_arg from multiuse.path_helpers import get_most_recent_fpath # %% codecell # %% codecell # def cboe_clean_symbol_ref bpath = Path(baseDir().path, 'ref_data/yoptions_ref/cboe_ref_raw') fpath = get_most_recent_fpath(bpath) cols_to_read = ['OSI Symbol', 'Underlying'] df = dd.read_parquet(fpath, columns=cols_to_read) df['OSI Symbol'] = df['OSI Symbol'].str.replace(' ', '') df['Underlying'] = df['Underlying'].str.replace('.', '', regex=False) df = df[df['Underlying'] != 'C'] df['sym_suf'] = df.apply( lambda row: row['OSI Symbol'].replace(row['Underlying'], ''), axis=1, meta=('sym_suf', 'object')) df = df.assign(suf_temp=df['sym_suf'].str.replace(' ', '').str.replace(
def read_clean_combined_all(local=False, dt=None, filter_syms=True): """Read, clean, and add columns to StockEOD combined all.""" df_all = None if local: bpath = Path(baseDir().path, 'StockEOD/combined_all') fpath = get_most_recent_fpath(bpath) cols_to_read = [ 'date', 'symbol', 'fOpen', 'fHigh', 'fLow', 'fClose', 'fVolume' ] df_all = pd.read_parquet(fpath, columns=cols_to_read) if df_all['date'].dtype == 'object': df_all['date'] = pd.to_datetime(df_all['date']) df_all.drop_duplicates(subset=['symbol', 'date'], inplace=True) else: cols_to_read = [ 'date', 'symbol', 'fOpen', 'fHigh', 'fLow', 'fClose', 'fVolume' ] df_all = serverAPI('stock_close_cb_all').df df_all = df_all[cols_to_read] if filter_syms: all_cs_syms = remove_funds_spacs() df_all = df_all[df_all['symbol'].isin( all_cs_syms['symbol'])].copy() df_all['date'] = pd.to_datetime(df_all['date']) # Define base bpath for 2015-2020 stock data bpath = Path(baseDir().path, 'historical/each_sym_all') path = get_most_recent_fpath( bpath.joinpath('each_sym_all', 'combined_all')) df_hist = pd.read_parquet(path) # Combine 2015-2020 stock data with ytd df_all = pd.concat([df_hist, df_all]).copy() df_all.drop_duplicates(subset=['symbol', 'date'], inplace=True) df_all.reset_index(drop=True, inplace=True) if not dt: dt = getDate.query('iex_eod') # Exclude all dates from before this year df_all = (df_all[df_all['date'] >= str(dt.year)].dropna( subset=['fVolume']).copy()) # Get rid of all symbols that only have 1 day of data df_vc = df_all['symbol'].value_counts() df_vc_1 = df_vc[df_vc == 1].index.tolist() df_all = (df_all[~df_all['symbol'].isin(df_vc_1)].reset_index( drop=True).copy()) # Sort by symbol, date ascending df_all = df_all.sort_values(by=['symbol', 'date'], ascending=True) df_all['fRange'] = (df_all['fHigh'] - df_all['fLow']) df_all['vol/mil'] = (df_all['fVolume'].div(1000000)) df_all['prev_close'] = df_all['fClose'].shift(periods=1, axis=0) df_all['prev_symbol'] = df_all['symbol'].shift(periods=1, axis=0) # Add fChangeP col print('Fib funcs: adding fChangeP column') df_all = add_fChangeP_col(df_all) # Merge with df_all and resume # Add gap column print('Fib funcs: adding gap column') df_all = add_gap_col(df_all) # Add range of gap df_all['gRange'] = (np.where(df_all['prev_close'] < df_all['fLow'], df_all['fHigh'] - df_all['prev_close'], df_all['fHigh'] - df_all['fLow'])) df_all['cumPerc'] = np.where(df_all['symbol'] == df_all['prev_symbol'], df_all['fChangeP'].cumsum(), np.NaN) df_all['perc5'] = np.where(df_all['symbol'] == df_all['prev_symbol'], df_all['cumPerc'].shift(-5) - df_all['cumPerc'], np.NaN) df_all['vol_avg_2m'] = np.where(df_all['symbol'] == df_all['prev_symbol'], df_all['fVolume'].rolling(60).mean(), np.NaN) # Add cumulative sum of last 5 fChangeP rows df_all['fCP5'] = (np.where( df_all['symbol'] == df_all['prev_symbol'], df_all['fChangeP'].rolling(min_periods=1, window=5).sum(), 0)) df_all = df_all.copy() # Calc RSI and moving averages print('Fib Funcs: calc_rsi') df_all = calc_rsi(df_all) print('Fib Funcs: making_moving_averages') df_all = make_moving_averages(df_all) # fHighMax funcs print('Fib funcs: fHighMax') df_all = add_fHighMax_col(df_all).copy() df_all = df_all.sort_values(by=['symbol', 'date'], ascending=True) float_32s = df_all.dtypes[df_all.dtypes == np.float32].index for col in float_32s: df_all[col] = df_all[col].astype(np.float64).round(3) df_all = dataTypes(df_all, parquet=True).df.copy() return df_all