class MarketData: def __init__(self, broker=None): self.writer = FileWriter() self.reader = FileReader() self.finder = PathFinder() def get_symbols(self): # get cached list of symbols symbols_path = self.finder.get_symbols_path() return list(self.reader.load_csv(symbols_path)[C.SYMBOL]) def get_dividends(self, symbol, timeframe=None): # given a symbol, return a cached dataframe return self.reader.load_csv(self.finder.get_dividends_path(symbol)) def save_dividends(self, symbol, timeframe=None): # given a symbol, save its dividend history if timeframe: df = self.get_dividends(symbol, timeframe) else: df = self.get_dividends(symbol) self.writer.update_csv(self.finder.get_dividends_path(symbol), df)
class Robinhood: # broker operation def __init__(self, usr=None, pwd=None, mfa=None): # Authentication load_dotenv() username = usr or os.environ['RH_USERNAME'] password = pwd or os.environ['RH_PASSWORD'] mfa_code = mfa or pyotp.TOTP(os.environ['RH_2FA']).now() rh.login(username, password, mfa_code=mfa_code) self.api = rh self.writer = FileWriter() self.reader = FileReader() self.finder = PathFinder() def get_symbols_from_instruments(self, instruments): # given a list of instruments, # return a list of corresponding symbols return [ self.api.get_symbol_by_url(instrument) for instrument in instruments ] def flatten(self, xxs): # flattens 2d list into 1d list return [x for xs in xxs for x in xs] def get_hists(self, symbols, span='year', interval='day', save=False): # given a list of symbols, # return a DataFrame with historical data hists = [ self.api.get_stock_historicals(symbol, interval, span) for symbol in symbols ] clean = [hist for hist in hists if hist != [None]] df = pd.DataFrame.from_records(self.flatten(clean)) # look into diff b/w tz_localize and tz_convert w param 'US/Eastern' # ideally store utc time df['begins_at'] = pd.to_datetime( df['begins_at']).apply(lambda x: x.tz_localize(None)) # df = df.sort_values('begins_at') if save is True: self.writer.save_csv('data/data.csv', df) return df def get_names(self, symbols): # given a list of stock symbols # return a list of company names return [self.api.get_name_by_symbol(symbol) for symbol in symbols] def save_symbols(self): # save all the portfolio symbols in a table if not hasattr(self, 'symbols'): self.load_portfolio() symbols = list(self.symbols) names = self.get_names(symbols) df = pd.DataFrame({C.SYMBOL: symbols, C.NAME: names}) self.writer.update_csv(self.finder.get_symbols_path(), df) def load_portfolio(self): start = time.time() # Data acquisition self.positions = self.api.get_all_positions() self.holdings = self.api.build_holdings() # print(self.holdings) # Create lookup table instrument -> symbol and vice versa instruments = [position['instrument'] for position in self.positions] symbols = self.get_symbols_from_instruments(instruments) self.instruments = dict(zip(instruments, symbols)) self.symbols = dict(map(reversed, self.instruments.items())) # Get historical data for all instruments self.hist = self.get_hists(symbols) end = time.time() print(f'Successfully loaded portfolio in {round(end-start, 2)}s.')
class MarketData: def __init__(self): load_dotenv(find_dotenv('config.env')) self.writer = FileWriter() self.reader = FileReader() self.finder = PathFinder() self.traveller = TimeTraveller() self.provider = 'iexcloud' def try_again(self, func, **kwargs): retries = (kwargs['retries'] if 'retries' in kwargs else C.DEFAULT_RETRIES) delay = (kwargs['delay'] if 'delay' in kwargs else C.DEFAULT_DELAY) func_args = { k: v for k, v in kwargs.items() if k not in {'retries', 'delay'} } for retry in range(retries): try: return func(**func_args) except Exception as e: if retry == retries - 1: raise e else: sleep(delay) def get_symbols(self): # get cached list of symbols symbols_path = self.finder.get_symbols_path() return list(self.reader.load_csv(symbols_path)[C.SYMBOL]) def get_dividends(self, symbol, timeframe='max'): # given a symbol, return a cached dataframe df = self.reader.load_csv( self.finder.get_dividends_path(symbol, self.provider)) filtered = self.reader.data_in_timeframe(df, C.EX, timeframe) return filtered def standardize(self, df, full_mapping, filename, columns, default): mapping = {k: v for k, v in full_mapping.items() if k in df} df = df[list(mapping)].rename(columns=mapping) time_col, val_cols = columns[0], columns[1:] if time_col in df and set(val_cols).issubset(df.columns): df = self.reader.update_df(filename, df, time_col).sort_values(by=[time_col]) # since time col is pd.datetime, # consider converting to YYYY-MM-DD str format for val_col in val_cols: df[val_col] = df[val_col].apply(lambda val: float(val) if val else default) return df def standardize_dividends(self, symbol, df): full_mapping = dict( zip(['exDate', 'paymentDate', 'declaredDate', 'amount'], [C.EX, C.PAY, C.DEC, C.DIV])) filename = self.finder.get_dividends_path(symbol, self.provider) return self.standardize(df, full_mapping, filename, [C.EX, C.DIV], 0) def save_dividends(self, **kwargs): # given a symbol, save its dividend history symbol = kwargs['symbol'] filename = self.finder.get_dividends_path(symbol, self.provider) if os.path.exists(filename): os.remove(filename) df = self.reader.update_df(filename, self.get_dividends(**kwargs), C.EX, C.DATE_FMT) self.writer.update_csv(filename, df) if os.path.exists(filename): return filename def get_splits(self, symbol, timeframe='max'): # given a symbol, return a cached dataframe df = self.reader.load_csv( self.finder.get_splits_path(symbol, self.provider)) filtered = self.reader.data_in_timeframe(df, C.EX, timeframe) return filtered def standardize_splits(self, symbol, df): full_mapping = dict( zip(['exDate', 'paymentDate', 'declaredDate', 'ratio'], [C.EX, C.PAY, C.DEC, C.RATIO])) filename = self.finder.get_splits_path(symbol, self.provider) return self.standardize(df, full_mapping, filename, [C.EX, C.RATIO], 1) def save_splits(self, **kwargs): # given a symbol, save its splits history symbol = kwargs['symbol'] filename = self.finder.get_splits_path(symbol, self.provider) if os.path.exists(filename): os.remove(filename) df = self.reader.update_df(filename, self.get_splits(**kwargs), C.EX, C.DATE_FMT) self.writer.update_csv(filename, df) if os.path.exists(filename): return filename def standardize_ohlc(self, symbol, df, filename=None): full_mapping = dict( zip([ 'date', 'open', 'high', 'low', 'close', 'volume', 'average', 'trades' ], [ C.TIME, C.OPEN, C.HIGH, C.LOW, C.CLOSE, C.VOL, C.AVG, C.TRADES ])) filename = filename or self.finder.get_ohlc_path(symbol, self.provider) df = self.standardize(df, full_mapping, filename, [C.TIME, C.OPEN, C.HIGH, C.LOW, C.CLOSE], 0) for col in [C.VOL, C.TRADES]: if col in df: df[col] = df[col].apply(lambda val: 0 if pd.isnull(val) else int(val)) return df def get_ohlc(self, symbol, timeframe='max'): df = self.reader.load_csv( self.finder.get_ohlc_path(symbol, self.provider)) filtered = self.reader.data_in_timeframe(df, C.TIME, timeframe) return filtered def save_ohlc(self, **kwargs): symbol = kwargs['symbol'] filename = self.finder.get_ohlc_path(symbol, self.provider) if os.path.exists(filename): os.remove(filename) df = self.reader.update_df(filename, self.get_ohlc(**kwargs), C.TIME, C.DATE_FMT) self.writer.update_csv(filename, df) if os.path.exists(filename): return filename def get_social_sentiment(self, symbol, timeframe='max'): # given a symbol, return a cached dataframe df = self.reader.load_csv(self.finder.get_sentiment_path(symbol)) filtered = self.reader.data_in_timeframe( df, C.TIME, timeframe)[[C.TIME, C.POS, C.NEG]] return filtered def get_social_volume(self, symbol, timeframe='max'): # given a symbol, return a cached dataframe df = self.reader.load_csv(self.finder.get_sentiment_path(symbol)) filtered = self.reader.data_in_timeframe( df, C.TIME, timeframe)[[C.TIME, C.VOL, C.DELTA]] return filtered def save_social_sentiment(self, **kwargs): # # given a symbol, save its sentiment data symbol = kwargs['symbol'] filename = self.finder.get_sentiment_path(symbol) if os.path.exists(filename): os.remove(filename) sen_df = self.reader.update_df(filename, self.get_social_sentiment(**kwargs), C.TIME) sen_df = sen_df[{C.TIME, C.POS, C.NEG}.intersection(sen_df.columns)] vol_df = self.reader.update_df(filename, self.get_social_volume(**kwargs), C.TIME) vol_df = vol_df[{C.TIME, C.VOL, C.DELTA}.intersection(vol_df.columns)] if sen_df.empty and not vol_df.empty: df = vol_df elif not sen_df.empty and vol_df.empty: df = sen_df elif not sen_df.empty and not vol_df.empty: df = sen_df.merge(vol_df, how="outer", on=C.TIME) else: return self.writer.update_csv(filename, df) if os.path.exists(filename): return filename def standardize_sentiment(self, symbol, df): full_mapping = dict( zip(['timestamp', 'bullish', 'bearish'], [C.TIME, C.POS, C.NEG])) filename = self.finder.get_sentiment_path(symbol, self.provider) df = self.standardize(df, full_mapping, filename, [C.TIME, C.POS, C.NEG], 0) return df[{C.TIME, C.POS, C.NEG}.intersection(df.columns)] def standardize_volume(self, symbol, df): full_mapping = dict( zip(['timestamp', 'volume_score', 'volume_change'], [C.TIME, C.VOL, C.DELTA])) filename = self.finder.get_sentiment_path(symbol, self.provider) df = self.standardize(df, full_mapping, filename, [C.TIME, C.VOL, C.DELTA], 0) return df[{C.TIME, C.VOL, C.DELTA}.intersection(df.columns)] def get_intraday(self, symbol, min=1, timeframe='max', extra_hrs=False): # implement way to transform 1 min dataset to 5 min data # or 30 or 60 should be flexible soln # implement way to only get market hours # given a symbol, return a cached dataframe dates = self.traveller.dates_in_range(timeframe) for date in dates: df = self.reader.load_csv( self.finder.get_intraday_path(symbol, date, self.provider)) yield self.reader.data_in_timeframe(df, C.TIME, timeframe) def save_intraday(self, **kwargs): symbol = kwargs['symbol'] dfs = self.get_intraday(**kwargs) filenames = [] for df in dfs: date = df[C.TIME].iloc[0].strftime(C.DATE_FMT) filename = self.finder.get_intraday_path(symbol, date, self.provider) if os.path.exists(filename): os.remove(filename) save_fmt = f'{C.DATE_FMT} {C.TIME_FMT}' df = self.reader.update_df(filename, df, C.TIME, save_fmt) self.writer.update_csv(filename, df) if os.path.exists(filename): filenames.append(filename) return filenames def get_unemployment_rate(self, timeframe='max'): # given a timeframe, return a cached dataframe df = self.reader.load_csv(self.finder.get_unemployment_path()) filtered = self.reader.data_in_timeframe(df, C.TIME, timeframe) return filtered def standardize_unemployment(self, df): full_mapping = dict(zip(['time', 'value'], [C.TIME, C.UN_RATE])) filename = self.finder.get_unemployment_path() return self.standardize(df, full_mapping, filename, [C.TIME, C.UN_RATE], 0) def save_unemployment_rate(self, **kwargs): # given a symbol, save its dividend history filename = self.finder.get_unemployment_path() if os.path.exists(filename): os.remove(filename) df = self.reader.update_df(filename, self.get_unemployment_rate(**kwargs), C.TIME, '%Y-%m') self.writer.update_csv(filename, df) if os.path.exists(filename): return filename def standardize_s2f_ratio(self, df): full_mapping = dict( zip(['t', 'o.daysTillHalving', 'o.ratio'], [C.TIME, C.HALVING, C.RATIO])) filename = self.finder.get_s2f_path() df = self.standardize(df, full_mapping, filename, [C.TIME, C.HALVING, C.RATIO], 0) return df[{C.TIME, C.HALVING, C.RATIO}.intersection(df.columns)] def get_s2f_ratio(self, timeframe='max'): # given a symbol, return a cached dataframe df = self.reader.load_csv(self.finder.get_s2f_path()) filtered = self.reader.data_in_timeframe( df, C.TIME, timeframe)[[C.TIME, C.HALVING, C.RATIO]] return filtered def standardize_s2f_deflection(self, df): full_mapping = dict(zip(['t', 'v'], [C.TIME, C.VAL])) filename = self.finder.get_s2f_path() df = self.standardize(df, full_mapping, filename, [C.TIME, C.VAL], 1) return df[{C.TIME, C.VAL}.intersection(df.columns)] def get_s2f_deflection(self, timeframe='max'): # given a symbol, return a cached dataframe df = self.reader.load_csv(self.finder.get_s2f_path()) filtered = self.reader.data_in_timeframe(df, C.TIME, timeframe)[[C.TIME, C.VAL]] return filtered def save_s2f(self, **kwargs): # # given a symbol, save its s2f data filename = self.finder.get_s2f_path() if os.path.exists(filename): os.remove(filename) rat_df = self.reader.update_df(filename, self.get_s2f_ratio(**kwargs), C.TIME, C.DATE_FMT) rat_df = rat_df[{C.TIME, C.HALVING, C.RATIO}.intersection(rat_df.columns)] def_df = self.reader.update_df(filename, self.get_s2f_deflection(**kwargs), C.TIME, C.DATE_FMT) def_df = def_df[{C.TIME, C.VAL}.intersection(def_df.columns)] if rat_df.empty and not def_df.empty: df = def_df elif not rat_df.empty and def_df.empty: df = rat_df elif not rat_df.empty and not def_df.empty: df = rat_df.merge(def_df, how="outer", on=C.TIME) else: return self.writer.update_csv(filename, df) if os.path.exists(filename): return filename