def analyze_hist_candles(df, bull=True, bear=False): """Analyze historical candlestick patterns.""" # Get list of all symbols from server all_symbols = serverAPI('all_symbols').df # Keep only common stocks for analysis all_cs = all_symbols[all_symbols['type'] == 'cs']['symbol'].tolist() # Eliminate prices < $1.5, and volume less than 500,000 df_sorted = df[(df['fClose'] > 1.5) & ( df['fVolume'] > 500000)].copy(deep=True) # Keep only common stocks df_sorted = df_sorted[df_sorted['symbol'].isin(all_cs)] # Add a range column df_sorted['fRange'] = (df_sorted['fHigh'] - df_sorted['fLow']).round(3) # Add a duplicate symbol column for analysis df_sorted['sym'] = df_sorted['symbol'] # Sort by symbol and date, descending df_sorted.sort_values(by=['symbol', 'date'], ascending=True, inplace=True) if bull: bu_prints(df_sorted.set_index('symbol')) if bear: be_prints(df_sorted.set_index('symbol')) return df_sorted
def analyze_iex_ytd(): """Analyze iex historical data for this year.""" df_prices_get = serverAPI('stock_close_prices').df df_prices = df_prices_get.copy() df_prices['date'] = pd.to_datetime(df_prices['date'], unit='ms') dt_max = df_prices['date'].max().date() path = Path(baseDir().path, 'StockEOD/combined', f"_{dt_max}.parquet") df_prices.to_parquet(path) df_2021 = df_prices[df_prices['date'].dt.year >= 2021].copy() return df_2021
def get_clean_yoptions(): """Get and clean yoptions data.""" df_cleaned = None df_temp = serverAPI('yoptions_all').df try: df_cleaned = clean_yfinance_options(df_temp=df_temp, refresh=True).copy() except Exception as e: print(str(e)) df_cleaned['date'] = (pd.to_datetime(df_cleaned['lastTradeDate'], format='%Y-%m-%d') .dt.normalize()) df_cleaned['symbol'] = df_cleaned['Underlying'] return df_cleaned
def get_company_meta_data(): """Get company meta data, save locally, from IEX.""" all_symbols = serverAPI('all_symbols').df all_cs = all_symbols[all_symbols['type'].isin(['cs', 'ad'])] sym_list = all_cs['symbol'].unique().tolist() bpath = Path(baseDir().path, 'company_stats/meta') for sym in tqdm(sym_list): try: ud = urlData(f"/stock/{sym}/company") fpath_suf = f"{sym.lower()[0]}/_{sym}.parquet" fpath = bpath.joinpath(fpath_suf) write_to_parquet(ud.df, fpath) except Exception as e: print(f"Company meta stats error: {type(e)} {str(e)}")
def get_all_max_hist(sym_list=False): """Get all max historical symbol data from IEX.""" load_dotenv() base_url = os.environ.get("base_url") base_path = f"{baseDir().path}/historical/2021" true, false = True, False payload = {'token': os.environ.get("iex_publish_api"), 'chartByDay': true} if not sym_list: all_symbols = serverAPI('all_symbols').df all_syms = all_symbols[all_symbols['type'].isin(['cs'])] sym_list = all_syms['symbol'].tolist() hist_dict, hist_errors_dict = {}, {} hist_list, hists_checked, hist_errors = [], [], [] for sym in tqdm(sym_list): fpath = f"{base_path}/{sym[0].lower()}/_{sym}.parquet" # If the local file doens't already. Doesn't check for missing data if not os.path.exists(fpath): url = f"{base_url}/stock/{sym}/chart/max" # payload = {'token': os.environ.get("iex_publish_api"), 'chartByDay': true} get = requests.get(url, params=payload) try: df = pd.DataFrame(get.json()) # hist_dict[sym] = df hist_list.append(sym) df.to_parquet(fpath) except Exception as e: print(e) hist_errors_dict[sym] = get hist_errors.append(sym) else: hists_checked.append(sym) # break result = ({ 'hist_dict': hist_dict, 'hist_list': hist_list, 'hists_checked': hists_checked, 'hist_errors_dict': hist_errors_dict, 'hist_errors': hist_errors }) return result
def treasuries_clean_write(): """Clean, and store daily treasury data locally.""" tz = serverAPI('treasuries').df tz['time_test'] = pd.to_datetime(tz['time'], unit='ms', errors='coerce') tz_mod = tz.dropna(subset=['time_test']) tz_mod = tz_mod.drop(columns=['time']).rename(columns={'time_test': 'time'}) tz = tz[~tz.index.isin(tz_mod.index)].drop(columns=['time_test']).copy() tz = pd.concat([tz, tz_mod]) col_dict = ({'^IRX': 'ThreeM', '^FVX': 'ThreeY', '^TNX': 'FiveY', '^TYX': 'TenY'}) tz.rename(columns=col_dict, inplace=True) tz['time'] = pd.to_datetime(tz['time']) tz['date'] = pd.to_datetime(tz['time'].dt.date) tz = tz.sort_values(by=['date']) tz_daily = tz.groupby(by=['date']).mean() path_to_write = Path(baseDir().path, 'economic_data/tz_daily.parquet') write_to_parquet(tz_daily, path_to_write) return tz_daily
def get_clean_all_st_data(): """Get all stocktwits data, clean it all too.""" st_data = serverAPI('st_trend_all').df st_data = dataTypes(st_data).df st_data['timestamp'] = pd.to_datetime(st_data['timestamp'], unit='ms') st_data['date'] = st_data['timestamp'].dt.normalize() st_na = False try: st_group = st_data.groupby(by=['symbol', 'date']).count() st_na = st_group.dropna() except Exception as e: print(str(e)) st_counts = (st_na.reset_index() .drop(columns=['watchlist_count', 'timestamp']) .rename(columns={'id': 'count'})) st_all = pd.merge(st_data, st_counts, on=['symbol', 'date']) st_all.rename(columns={'timestamp': 'st_time'}, inplace=True) st_all = dataTypes(st_all).df return st_all
importlib.reload(sys.modules['data_collect.iex_class']) importlib.reload(sys.modules['data_collect.yfinance_funcs']) importlib.reload(sys.modules['api']) importlib.reload(sys.modules['master_funcs.master_iex_stats']) importlib.reload(sys.modules['data_collect.sec_rss']) # %% codecell ########################################## serverAPI('redo', val='clear_yoptions_temp_unfin') # serverAPI('redo', val='make_yoptions_file_struct') # %% codecell serverAPI('redo', val='master_yfinance_options_collect') # %% codecell serverAPI('redo', val='master_yfinance_options_followup') # %% codecell serverAPI('redo', val='yoptions_combine_last') # %% codecell serverAPI('redo', val='combine_yoptions_combine_all') # %% codecell serverAPI('redo', val='combine_yoptions_all') # %% codecell serverAPI('redo', val='yoptions_combine_temp_all') # %% codecell serverAPI('redo', val='yoptions_drop_hist_dupes')
import pandas as pd import numpy as np from data_collect.sec_rss import SecRssFeed, AnalyzeSecRss importlib.reload(sys.modules['data_collect.sec_rss']) from data_collect.sec_rss import SecRssFeed, AnalyzeSecRss from api import serverAPI # %% codecell srf = SecRssFeed() srf_df = srf.df all_syms = serverAPI('all_symbols').df ocgn_df = all_syms[all_syms['symbol'] == 'OCGN'] srf_df.info() srf_df['dt'] = pd.to_datetime(srf_df['pubDate']) prev_15 = (datetime.now() - timedelta(minutes=60)).time() sec_df = (srf_df[(srf_df['dt'].dt.time > prev_15) & (srf_df['dt'].dt.date == date.today())].copy()) sec_df srf_df[srf_df['CIK'] == ocgn_df['cik'].iloc[0]] srf_df.df # %% codecell
from zipfile import ZipFile with ZipFile(f, 'r') as zip: zip.extractall(dir_to_extract) # %% codecell ######################################################### # Form 13G 13G/A 13D/A # sec_idx = serverAPI(which='redo', val='sec_idx_master') # sec_inst = serverAPI(which='sec_inst_holdings') # iex_close = serverAPI(which='redo', val='iex_close') sec_master = serverAPI(which='redo', val='sec_idx_master') sec_master = serverAPI(which='redo', val='combine_all_sec_masters') sec_masters = serverAPI(which='sec_master_all').df sec_inst.df.shape """ OCGNs merger agreement https://fintel.io/doc/sec-hsgx-histogenics-8k-2019-april-08-17994 """ sec_master = secMasterIdx() sec_df = sec_master.df.copy(deep=True) sec_df.shape sec_df.dtypes sec_df['Form Type'].value_counts()
col_dict = ({'^IRX': 'ThreeM', '^FVX': 'ThreeY', '^TNX': 'FiveY', '^TYX': 'TenY'}) tz.rename(columns=col_dict, inplace=True) tz['time'] = pd.to_datetime(tz['time']) tz['date'] = pd.to_datetime(tz['time'].dt.date) tz = tz.sort_values(by=['date']) tz_daily = tz.groupby(by=['date']).mean() path_to_write = Path(baseDir().path, 'economic_data/tz_daily.parquet') write_to_parquet(tz_daily, path_to_write) return tz_daily # %% codecell yoptions_all = serverAPI('yoptions_all').df shape = yoptions_all.shape[0] for col in yoptions_all.columns: na = yoptions_all[col].isna().sum() if (na / shape) > .2: print(f"Column {col}: has {na}s {str(round(na / shape, 2))}") try: inf = yoptions_all[np.isfinite(yoptions_all[col]) == False].shape[0] if inf > 0: print(f"{col} {inf}") except TypeError: pass # %% codecell
df_test # %% codecell # There's the question of correlation with percentage returns # Or whether to apply a logarithmic function to flatten the noise. # I'm guess that ^ this is probably the better idea. scaled_price = (logprice - np.mean(logprice)) / np.sqrt(np.var(logprice)) # %% codecell fpath = Path(baseDir().path, 'ref_data', 'peer_list', '_peers.parquet') df_peers = pd.read_parquet(fpath) all_syms = serverAPI('all_symbols').df df_peers = pd.merge(df_peers, all_syms[['symbol', 'type']], on='symbol', how='left') df_peers = (df_peers.mask('corr', .95, lesser=True).mask('corr', -.95, greater=True)) # %% codecell df_peers_idx = df_peers.set_index(['key', 'type']) df_peers df_peers[df_peers['key'] == 'CYBN']
"""A file dedicated to finding SEC special dividend announcements.""" # %% codecell from tqdm import tqdm from pathlib import Path import pandas as pd import requests import os from api import serverAPI from multiuse.help_class import getDate from multiuse.create_file_struct import makedirs_with_permissions # %% codecell # This is purely for name lookup sec_ref_data = serverAPI('sec_ref').df sec_ref_data.head(5) sec_ref = serverAPI('sec_master_all').df sec_df = sec_ref.copy() sec_df['Date Filed'] = pd.to_datetime(sec_df['Date Filed'], format='%Y%m%d') # sec_df.groupby('Date Filed').filter(lambda file: sec_df[file] == '8-K') sec_df['Form Type'].value_counts() bus_days = getDate.get_bus_days() busdays_2021 = (bus_days[(bus_days['date'] >= '2021') & (bus_days['date'] <= str(getDate.query('iex_eod')))])
# (5,100) or 5/45 or 10% of signals are profitable using the pc_10 df_short.head() (df_short.reset_index().drop_duplicates( subset=['symbol', 'vol/avg']).sort_values(by=['side_pc10+', 'vol/avg'], ascending=False).set_index( 'date', 'symbol').head(50)) # df.sort_values(by=['side_pc10+', 'vol/avg'], ascending=False).head(50) # %% codecell ############################################################## cboe_df['date_dt'] = pd.to_datetime(cboe_df['dataDate']) cboe_df['date_df'] = (cboe_df['date_dt'] + bs).dt.date my_watch = serverAPI('st_watch').df.T my_syms = my_watch['symbols'].values.tolist() # %% codecell ############################################################## # Threading import logging import threading import time import concurrent.futures def thread_function(name): logging.info("Thread %s: starting", name) time.sleep(2)
pd.set_option('display.max_columns', None) # Display maximum rows pd.set_option('display.max_rows', 500) # %% codecell ######################################################## import glob import threading base_dir = baseDir().path fpath = f"{base_dir}/derivatives/iex_symref/**" paths = glob.glob(fpath, recursive=True) view_symref = IexOptionSymref('VIEW') all_syms = serverAPI('all_symbols').df all_cs = all_syms[all_syms['type'] == 'cs'] all_cs.shape all_cs_sym = all_cs['symbol'].tolist() for sym in all_cs_sym: th = threading.Thread(target=IexOptionSymref, args=(sym,)) th.start() all_syms.head(10) def iex_options_symbol_ref(): """Add tasks to queue to execute.""" syms_fpath = f"{base_dir}/tickers/all_symbols.gz" all_syms = pd.read_json(syms_fpath, compression='gzip')
break df = regStudies(df).df df = makeDrawings(df).df df['localMin_5'].value_counts() df['localMin_10'].value_counts() df.head(10) df.shape # %% codecell ############################################# df = serverAPI('iex_quotes_raw').df df.shape iex_df = dataTypes(df).df # 27 mbs with data type adjustments iex_df.info(memory_usage='deep') # 154 mbs without data type adjustments df.info(memory_usage='deep') import numpy as np np.finfo('float32') np.finfo('float16') np.finfo('float16').max np.finfo('float32').max # %% codecell
report_date = getDate.which_fname_date() report_date = datetime.date(2021, 3, 26) td_vol = tradeVolume(report_date, 'con_volume', fresh=True).vol_df td_vol_last = td_vol.copy(deep=True) td_vol_last.sort_values(by=['mQuant'], ascending=False).head(100) td_vol_last.head(10) cboe_last = serverAPI('cboe_mmo_exp_last').df for key in cboe_last.keys(): cboe_df = cboe_last[key] break td_vol_last.rename(columns={'contdate': 'expDate', 'underlying': 'Underlying'}, inplace=True) td_vol_last.drop(columns=['pkind', 'exId'], inplace=True) my_watch = serverAPI('st_watch').df.T my_watch_syms = my_watch['symbols'].unique() both_df = pd.merge(td_vol_last, cboe_df, how='inner', on=['Underlying', 'expDate']) both_df.head(10) my_syms_df = both_df[both_df['Underlying'].isin(my_watch_syms.tolist())].copy(deep=True)
from multiuse.help_class import df_create_bins # %% codecell from master_funcs.yoptions_master import SetUpYahooOptions, yoptions_combine_temp_all importlib.reload(sys.modules['master_funcs.yoptions_master']) from master_funcs.yoptions_master import SetUpYahooOptions, yoptions_combine_temp_all from data_collect.yfinance_get_options import yahoo_options importlib.reload(sys.modules['data_collect.yfinance_get_options']) from data_collect.yfinance_get_options import yahoo_options # %% codecell # %% codecell from api import serverAPI serverAPI('redo', val='yoptions_combine_temp_all') # 1. starts with SetUpYahooOptions # I also need to set up a prefork class for celery def execute_yahoo_options(df): """Execute for loop. Run from tasks execute_function.""" df = pd.read_json(df) for index, row in df.iterrows(): yahoo_options(row['symbol'], proxy=row['proxy']) def yahoo_options(sym, proxy=False, n=False): """Get options chain data from yahoo finance.""" dt = getDate.query('iex_eod')
# %% codecell ################################## # Daily treasury report url = "https://fsapps.fiscal.treasury.gov/dts/files/21091000.xlsx" url1 = "https://fsapps.fiscal.treasury.gov/dts/files/21091000.txt" # %% codecell ################################## # %% codecell ################################## serverAPI('redo', val='create_sec_rss_hist') # %% codecell ################################## fpath = "/Users/unknown1/Algo/data/iex_eod_quotes/combined/_2021_all_2021-07-16.gz" df = pd.read_json(fpath, compression='gzip') #df = pd.read_csv(fpath, compression='gzip') #df = pd.read_csv(fpath, compression='gzip', usecols=cols_to_keep) cols_to_keep = (['symbol', 'open', 'close', 'high', 'highTime', 'low', 'lowTime', 'latestUpdate', 'previousClose', 'previousVolume', 'change', 'changePercent', 'volume', 'avgTotalVolume',