def extract_dataset(key, ticker=None, date=None, work_dict=None, scrub_mode='NO_SORT', verbose=False): """extract_dataset Extract the IEX key data for a ticker from Redis and return it as a tuple (status, ``pandas.Dataframe``) :param key: IEX dataset key :param ticker: string ticker to extract :param date: optional - string date to extract formatted ``YYYY-MM-DD`` :param work_dict: dictionary of args :param scrub_mode: type of scrubbing handler to run :param verbose: optional - boolean for turning on logging """ if not key or key not in keys: log.error(f'unsupported extract key={key} in keys={keys}') return None label = key df_type = keys[key] df_str = iex_consts.get_datafeed_str(df_type=df_type) latest_close_date = ae_utils.get_last_close_str() use_date = date if work_dict: if not ticker: ticker = work_dict.get('ticker', None) if not work_dict: work_dict = api_requests.get_ds_dict(ticker=ticker) req = copy.deepcopy(work_dict) if not use_date: use_date = latest_close_date redis_key = f'{ticker}_{use_date}_{key}' req['redis_key'] = redis_key req['s3_key'] = redis_key if verbose: log.info(f'{label} - {df_str} - ' f'date={date} ' f'redis_key={req["redis_key"]} ' f's3_key={req["s3_key"]} ' f'{ae_consts.ppj(req)}') return extract_utils.perform_extract(df_type=df_type, df_str=df_str, work_dict=req, scrub_mode=scrub_mode, verbose=verbose)
def get_options(ticker, contract_type, exp_date_str, strike=None): """get_options :param ticker: ticker to lookup :param exp_date_str: ``YYYY-MM-DD`` expiration date format :param strike: optional strike price, ``None`` returns all option chains :param contract_type: ``C`` calls or ``P`` for puts, if ``strike=None`` then the ``contract_type`` is ignored """ log.info('get_options ticker={} ' 'contract={} exp_date={} strike={}'.format( ticker, contract_type, exp_date_str, strike)) response = make_request(ticker=ticker, exp_date_str=exp_date_str) try: options_data = extract_options_data(response=response, contract_type=contract_type, strike=strike) options_dict = { 'date': ae_utils.get_last_close_str(), 'exp_date': None, 'num_calls': None, 'num_puts': None, 'calls': None, 'puts': None } if 'expirationDate' in options_data[0]: epoch_exp = options_data[0]['expirationDate'] options_dict['exp_date'] = \ datetime.datetime.fromtimestamp( epoch_exp).strftime( ae_consts.COMMON_TICK_DATE_FORMAT) calls_df = pd.DataFrame(options_data[0]['calls']) options_dict['num_calls'] = len(options_data[0]['calls']) options_dict['calls'] = calls_df.to_json(orient='records') puts_df = pd.DataFrame(options_data[0]['puts']) options_dict['num_puts'] = len(options_data[0]['puts']) options_dict['puts'] = puts_df.to_json(orient='records') return options_dict except Exception as e: log.error('failed get_options(' 'ticker={}, ' 'contract_type={}, ' 'exp_date_str={}, ' 'strike={}) with ex={}'.format(ticker, contract_type, exp_date_str, strike, e)) return []
def test_latest(self): """test_latest""" if ae_consts.ev('INT_TESTS', '0') == '0': return ticker = 'SPY' start_date = ae_utils.get_last_close_str() # build dataset cache dictionary runner = algo_runner.AlgoRunner(ticker=ticker, start_date=start_date, end_date=None, history_loc=self.algo_history_loc, algo_config=self.algo_config, verbose_algo=True, verbose_processor=False, verbose_indicators=False) req = {'ticker': ticker, 'date_str': start_date, 'start_row': -200} df = runner.latest(**req) self.assertEqual(len(df.index), len(runner.get_history().index))
def fetch(ticker=None, tickers=None, fetch_mode=None, iex_datasets=None, redis_enabled=True, redis_address=None, redis_db=None, redis_password=None, redis_expire=None, s3_enabled=True, s3_address=None, s3_bucket=None, s3_access_key=None, s3_secret_key=None, s3_region_name=None, s3_secure=False, celery_disabled=True, broker_url=None, result_backend=None, label=None, verbose=False): """fetch Fetch all supported datasets for a stock ``ticker`` or a list of ``tickers`` and returns a dictionary. Once run, the datasets will all be cached in Redis and archived in Minio (S3) by default. Python example: .. code-block:: python from analysis_engine.fetch import fetch d = fetch(ticker='NFLX') print(d) for k in d['NFLX']: print('dataset key: {}'.format(k)) By default, it synchronously automates: - fetching all datasets - caching all datasets in Redis - archiving all datasets in Minio (S3) - returns all datasets in a single dictionary This was created for reducing the amount of typying in Jupyter notebooks. It can be set up for use with a distributed engine as well with the optional arguments depending on your connectitivty requirements. .. note:: Please ensure Redis and Minio are running before trying to extract tickers **Stock tickers to fetch** :param ticker: single stock ticker/symbol/ETF to fetch :param tickers: optional - list of tickers to fetch **(Optional) Data sources, datafeeds and datasets to gather** :param fetch_mode: data sources - default is ``all`` (both IEX and Yahoo), ``iex`` for only IEX, ``yahoo`` for only Yahoo. :param iex_datasets: list of strings for gathering specific `IEX datasets <https://iextrading.com/developer/docs/#stocks>`__ which are set as consts: ``analysis_engine.iex.consts.FETCH_*``. **(Optional) Redis connectivity arguments** :param redis_enabled: bool - toggle for auto-caching all datasets in Redis (default is ``True``) :param redis_address: Redis connection string format: ``host:port`` (default is ``localhost:6379``) :param redis_db: Redis db to use (default is ``0``) :param redis_password: optional - Redis password (default is ``None``) :param redis_expire: optional - Redis expire value (default is ``None``) **(Optional) Minio (S3) connectivity arguments** :param s3_enabled: bool - toggle for auto-archiving on Minio (S3) (default is ``True``) :param s3_address: Minio S3 connection string format: ``host:port`` (default is ``localhost:9000``) :param s3_bucket: S3 Bucket for storing the artifacts (default is ``dev``) which should be viewable on a browser: http://localhost:9000/minio/dev/ :param s3_access_key: S3 Access key (default is ``trexaccesskey``) :param s3_secret_key: S3 Secret key (default is ``trex123321``) :param s3_region_name: S3 region name (default is ``us-east-1``) :param s3_secure: Transmit using tls encryption (default is ``False``) **(Optional) Celery worker broker connectivity arguments** :param celery_disabled: bool - toggle synchronous mode or publish to an engine connected to the `Celery broker and backend <https://github.com/celery/celery#transports-and-backends>`__ (default is ``True`` - synchronous mode without an engine or need for a broker or backend for Celery) :param broker_url: Celery broker url (default is ``redis://0.0.0.0:6379/13``) :param result_backend: Celery backend url (default is ``redis://0.0.0.0:6379/14``) :param label: tracking log label **(Optional) Debugging** :param verbose: bool - show fetch warnings and other debug logging (default is False) **Supported environment variables** :: export REDIS_ADDRESS="localhost:6379" export REDIS_DB="0" export S3_ADDRESS="localhost:9000" export S3_BUCKET="dev" export AWS_ACCESS_KEY_ID="trexaccesskey" export AWS_SECRET_ACCESS_KEY="trex123321" export AWS_DEFAULT_REGION="us-east-1" export S3_SECURE="0" export WORKER_BROKER_URL="redis://0.0.0.0:6379/13" export WORKER_BACKEND_URL="redis://0.0.0.0:6379/14" """ rec = {} extract_records = [] use_tickers = tickers if ticker: use_tickers = [ticker] else: if not use_tickers: use_tickers = [] default_iex_datasets = [ 'daily', 'minute', 'quote', 'stats', 'peers', 'news', 'financials', 'earnings', 'dividends', 'company' ] if not iex_datasets: iex_datasets = default_iex_datasets if not fetch_mode: fetch_mode = 'all' if redis_enabled: if not redis_address: redis_address = os.getenv('REDIS_ADDRESS', 'localhost:6379') if not redis_password: redis_password = os.getenv('REDIS_PASSWORD', None) if not redis_db: redis_db = int(os.getenv('REDIS_DB', '0')) if not redis_expire: redis_expire = os.getenv('REDIS_EXPIRE', None) if s3_enabled: if not s3_address: s3_address = os.getenv('S3_ADDRESS', 'localhost:9000') if not s3_access_key: s3_access_key = os.getenv('AWS_ACCESS_KEY_ID', 'trexaccesskey') if not s3_secret_key: s3_secret_key = os.getenv('AWS_SECRET_ACCESS_KEY', 'trex123321') if not s3_region_name: s3_region_name = os.getenv('AWS_DEFAULT_REGION', 'us-east-1') if not s3_secure: s3_secure = os.getenv('S3_SECURE', '0') == '1' if not s3_bucket: s3_bucket = os.getenv('S3_BUCKET', 'dev') if not broker_url: broker_url = os.getenv('WORKER_BROKER_URL', 'redis://0.0.0.0:6379/13') if not result_backend: result_backend = os.getenv('WORKER_BACKEND_URL', 'redis://0.0.0.0:6379/14') if not label: label = 'get-latest' num_tickers = len(use_tickers) last_close_str = get_last_close_str() if iex_datasets: log.info('{} - getting latest for tickers={} ' 'iex={}'.format(label, num_tickers, json.dumps(iex_datasets))) else: log.info('{} - getting latest for tickers={}'.format( label, num_tickers)) for ticker in use_tickers: ticker_key = '{}_{}'.format(ticker, last_close_str) fetch_req = build_get_new_pricing_request() fetch_req['base_key'] = ticker_key fetch_req['celery_disabled'] = celery_disabled fetch_req['ticker'] = ticker fetch_req['label'] = label fetch_req['fetch_mode'] = fetch_mode fetch_req['iex_datasets'] = iex_datasets fetch_req['s3_enabled'] = s3_enabled fetch_req['s3_bucket'] = s3_bucket fetch_req['s3_address'] = s3_address fetch_req['s3_secure'] = s3_secure fetch_req['s3_region_name'] = s3_region_name fetch_req['s3_access_key'] = s3_access_key fetch_req['s3_secret_key'] = s3_secret_key fetch_req['s3_key'] = ticker_key fetch_req['redis_enabled'] = redis_enabled fetch_req['redis_address'] = redis_address fetch_req['redis_password'] = redis_password fetch_req['redis_db'] = redis_db fetch_req['redis_key'] = ticker_key fetch_req['redis_expire'] = redis_expire fetch_req['redis_address'] = redis_address fetch_req['s3_address'] = s3_address log.info('{} - fetching ticker={} last_close={} ' 'redis_address={} s3_address={}'.format( label, ticker, last_close_str, fetch_req['redis_address'], fetch_req['s3_address'])) fetch_res = price_utils.run_get_new_pricing_data(work_dict=fetch_req) if fetch_res['status'] == SUCCESS: log.info('{} - fetched ticker={} ' 'preparing for extraction'.format(label, ticker)) extract_req = fetch_req extract_records.append(extract_req) else: log.warning('{} - failed getting ticker={} data ' 'status={} err={}'.format( label, ticker, get_status(status=fetch_res['status']), fetch_res['err'])) # end of if worked or not # end for all tickers to fetch """ Extract Datasets """ iex_daily_status = FAILED iex_minute_status = FAILED iex_quote_status = FAILED iex_stats_status = FAILED iex_peers_status = FAILED iex_news_status = FAILED iex_financials_status = FAILED iex_earnings_status = FAILED iex_dividends_status = FAILED iex_company_status = FAILED yahoo_news_status = FAILED yahoo_options_status = FAILED yahoo_pricing_status = FAILED iex_daily_df = None iex_minute_df = None iex_quote_df = None iex_stats_df = None iex_peers_df = None iex_news_df = None iex_financials_df = None iex_earnings_df = None iex_dividends_df = None iex_company_df = None yahoo_option_calls_df = None yahoo_option_puts_df = None yahoo_pricing_df = None yahoo_news_df = None extract_iex = True if fetch_mode not in ['all', 'iex']: extract_iex = False extract_yahoo = True if fetch_mode not in ['all', 'yahoo']: extract_yahoo = False for service_dict in extract_records: ticker_data = {} ticker = service_dict['ticker'] extract_req = get_ds_dict(ticker=ticker, base_key=service_dict.get('base_key', None), ds_id=label, service_dict=service_dict) if 'daily' in iex_datasets or extract_iex: iex_daily_status, iex_daily_df = \ iex_extract_utils.extract_daily_dataset( extract_req) if iex_daily_status != SUCCESS: if verbose: log.warning('unable to fetch iex_daily={}'.format(ticker)) if 'minute' in iex_datasets or extract_iex: iex_minute_status, iex_minute_df = \ iex_extract_utils.extract_minute_dataset( extract_req) if iex_minute_status != SUCCESS: if verbose: log.warning('unable to fetch iex_minute={}'.format(ticker)) if 'quote' in iex_datasets or extract_iex: iex_quote_status, iex_quote_df = \ iex_extract_utils.extract_quote_dataset( extract_req) if iex_quote_status != SUCCESS: if verbose: log.warning('unable to fetch iex_quote={}'.format(ticker)) if 'stats' in iex_datasets or extract_iex: iex_stats_df, iex_stats_df = \ iex_extract_utils.extract_stats_dataset( extract_req) if iex_stats_status != SUCCESS: if verbose: log.warning('unable to fetch iex_stats={}'.format(ticker)) if 'peers' in iex_datasets or extract_iex: iex_peers_df, iex_peers_df = \ iex_extract_utils.extract_peers_dataset( extract_req) if iex_peers_status != SUCCESS: if verbose: log.warning('unable to fetch iex_peers={}'.format(ticker)) if 'news' in iex_datasets or extract_iex: iex_news_status, iex_news_df = \ iex_extract_utils.extract_news_dataset( extract_req) if iex_news_status != SUCCESS: if verbose: log.warning('unable to fetch iex_news={}'.format(ticker)) if 'financials' in iex_datasets or extract_iex: iex_financials_status, iex_financials_df = \ iex_extract_utils.extract_financials_dataset( extract_req) if iex_financials_status != SUCCESS: if verbose: log.warning( 'unable to fetch iex_financials={}'.format(ticker)) if 'earnings' in iex_datasets or extract_iex: iex_earnings_status, iex_earnings_df = \ iex_extract_utils.extract_dividends_dataset( extract_req) if iex_earnings_status != SUCCESS: if verbose: log.warning( 'unable to fetch iex_earnings={}'.format(ticker)) if 'dividends' in iex_datasets or extract_iex: iex_dividends_status, iex_dividends_df = \ iex_extract_utils.extract_dividends_dataset( extract_req) if iex_dividends_status != SUCCESS: if verbose: log.warning( 'unable to fetch iex_dividends={}'.format(ticker)) if 'company' in iex_datasets or extract_iex: iex_company_status, iex_company_df = \ iex_extract_utils.extract_dividends_dataset( extract_req) if iex_company_status != SUCCESS: if verbose: log.warning( 'unable to fetch iex_company={}'.format(ticker)) # end of iex extracts if extract_yahoo: yahoo_options_status, yahoo_option_calls_df = \ yahoo_extract_utils.extract_option_calls_dataset( extract_req) yahoo_options_status, yahoo_option_puts_df = \ yahoo_extract_utils.extract_option_puts_dataset( extract_req) if yahoo_options_status != SUCCESS: if verbose: log.warning( 'unable to fetch yahoo_options={}'.format(ticker)) yahoo_pricing_status, yahoo_pricing_df = \ yahoo_extract_utils.extract_pricing_dataset( extract_req) if yahoo_pricing_status != SUCCESS: if verbose: log.warning( 'unable to fetch yahoo_pricing={}'.format(ticker)) yahoo_news_status, yahoo_news_df = \ yahoo_extract_utils.extract_yahoo_news_dataset( extract_req) if yahoo_news_status != SUCCESS: if verbose: log.warning('unable to fetch yahoo_news={}'.format(ticker)) # end of yahoo extracts ticker_data['daily'] = iex_daily_df ticker_data['minute'] = iex_minute_df ticker_data['quote'] = iex_quote_df ticker_data['stats'] = iex_stats_df ticker_data['peers'] = iex_peers_df ticker_data['news1'] = iex_news_df ticker_data['financials'] = iex_financials_df ticker_data['earnings'] = iex_earnings_df ticker_data['dividends'] = iex_dividends_df ticker_data['company'] = iex_company_df ticker_data['calls'] = yahoo_option_calls_df ticker_data['puts'] = yahoo_option_puts_df ticker_data['pricing'] = yahoo_pricing_df ticker_data['news'] = yahoo_news_df rec[ticker] = ticker_data # end of for service_dict in extract_records return rec
def setUp( self): """setUp""" self.ticker = 'SPY' self.timeseries = 'day' self.trade_strategy = 'count' self.daily_dataset = json.loads( open('tests/datasets/spy-daily.json', 'r').read()) self.daily_df = pd.DataFrame(self.daily_dataset) self.daily_df['date'] = pd.to_datetime( self.daily_df['date']) self.start_date_str = self.daily_df['date'].iloc[0].strftime( ae_consts.COMMON_TICK_DATE_FORMAT) self.end_date_str = self.daily_df['date'].iloc[-1].strftime( ae_consts.COMMON_TICK_DATE_FORMAT) self.minute_df = pd.DataFrame([]) self.options_df = pd.DataFrame([]) self.use_date = '2018-11-05' self.dataset_id = '{}_{}'.format( self.ticker, self.use_date) self.datasets = [ 'daily' ] self.data = {} self.data[self.ticker] = [ { 'id': self.dataset_id, 'date': self.use_date, 'data': { 'daily': self.daily_df, 'minute': self.minute_df, 'options': self.options_df } } ] self.balance = 10000.00 self.last_close_str = ae_utils.get_last_close_str( fmt=ae_consts.COMMON_DATE_FORMAT) self.output_dir = ( '/opt/sa/tests/datasets/algo') self.willr_close_path = ( 'analysis_engine/mocks/example_indicator_williamsr.py') self.willr_open_path = ( 'analysis_engine/mocks/example_indicator_williamsr_open.py') self.algo_config_dict = { 'name': 'test_5_days_ahead', 'algo_module_path': None, 'algo_version': 1, 'timeseries': self.timeseries, 'trade_strategy': self.trade_strategy, 'trade_horizon': 5, 'num_owned': 10, 'buy_shares': 10, 'balance': 100000, 'ticker': 'SPY', 'verbose': True, 'verbose_processor': True, 'positions': { 'SPY': { 'shares': 10, 'buys': [], 'sells': [] } }, 'buy_rules': { 'confidence': 75, 'min_indicators': 3 }, 'sell_rules': { 'confidence': 75, 'min_indicators': 3 }, 'indicators': [ { 'name': 'willr_-70_-30', 'module_path': self.willr_close_path, 'category': 'technical', 'type': 'momentum', 'uses_data': 'daily', 'high': 0, 'low': 0, 'close': 0, 'open': 0, 'willr_value': 0, 'num_points': 10, 'buy_below': -70, 'sell_above': -30, 'is_buy': False, 'is_sell': False, 'verbose': True }, { 'name': 'willr_-80_-20', 'module_path': self.willr_close_path, 'category': 'technical', 'type': 'momentum', 'uses_data': 'daily', 'high': 0, 'low': 0, 'close': 0, 'open': 0, 'willr_value': 0, 'num_points': 10, 'buy_below': -80, 'sell_above': -20, 'is_buy': False, 'is_sell': False }, { 'name': 'willr_-90_-10', 'module_path': self.willr_close_path, 'category': 'technical', 'type': 'momentum', 'uses_data': 'daily', 'high': 0, 'low': 0, 'close': 0, 'open': 0, 'willr_value': 0, 'num_points': 10, 'buy_below': -90, 'sell_above': -10, 'is_buy': False, 'is_sell': False }, { 'name': 'willr_open_-80_-20', 'module_path': self.willr_open_path, 'category': 'technical', 'type': 'momentum', 'uses_data': 'daily', 'high': 0, 'low': 0, 'close': 0, 'open': 0, 'willr_open_value': 0, 'num_points': 15, 'buy_below': -80, 'sell_above': -20, 'is_buy': False, 'is_sell': False } ], 'slack': { 'webhook': None } }
def fetch_tickers_from_screener( url, columns=DEFAULT_FINVIZ_COLUMNS, as_json=False, soup_selector='td.screener-body-table-nw', label='fz-screen-converter'): """fetch_tickers_from_screener Convert all the tickers on a FinViz screener url to a ``pandas.DataFrame``. Returns a dictionary with a ticker list and DataFrame or a json-serialized DataFrame in a string (by default ``as_json=False`` will return a ``pandas.DataFrame`` if the ``returned-dictionary['status'] == SUCCESS`` Works with urls created on: https://finviz.com/screener.ashx .. code-block:: python import analysis_engine.finviz.fetch_api as fv url = ( 'https://finviz.com/screener.ashx?' 'v=111&' 'f=cap_midunder,exch_nyse,fa_div_o5,idx_sp500' '&ft=4') res = fv.fetch_tickers_from_screener(url=url) print(res) :param url: FinViz screener url :param columns: ordered header column as a list of strings and corresponds to the header row from the FinViz screener table :param soup_selector: ``bs4.BeautifulSoup.selector`` string for pulling selected html data (by default ``td.screener-body-table-nw``) :param as_json: FinViz screener url :param label: log tracking label string """ rec = { 'data': None, 'created': get_last_close_str(), 'tickers': [] } res = req_utils.build_result( status=NOT_RUN, err=None, rec=rec) try: log.info( '{} fetching url={}'.format( label, url)) response = requests.get(url) if response.status_code != requests.codes.ok: err = ( '{} finviz returned non-ok HTTP (200) ' 'status_code={} with text={} for url={}'.format( label, response.status_code, response.text, url)) log.error(err) return req_utils.build_result( status=ERR, err=err, rec=rec) # end of checking for a good HTTP response status code soup = bs4.BeautifulSoup( response.text, features='html.parser') selected = soup.select(soup_selector) log.debug( '{} found={} url={}'.format( label, len(selected), url)) ticker_list = [] rows = [] use_columns = columns num_columns = len(use_columns) new_row = {} col_idx = 0 for idx, node in enumerate(selected): if col_idx >= num_columns: col_idx = 0 column_name = use_columns[col_idx] test_text = str(node.text).lower().strip() col_idx += 1 if column_name != 'ignore' and ( test_text != 'save as portfolio' and test_text != 'export'): cur_text = str(node.text).strip() if column_name == 'ticker': ticker_list.append(cur_text) new_row[column_name] = cur_text.upper() else: new_row[column_name] = cur_text # end of filtering bad sections around table if len(new_row) >= num_columns: log.debug( '{} adding ticker={}'.format( label, new_row['ticker'])) rows.append(new_row) new_row = {} col_idx = 0 # end of if valid row # end if column is valid # end of walking through all matched html data on the screener log.debug( '{} done convert url={} to tickers={} ' 'rows={}'.format( label, url, ticker_list, len(rows))) df = pd.DataFrame( rows) log.info( '{} fetch done - df={} from url={} with tickers={} ' 'rows={}'.format( label, len(df.index), url, ticker_list, len(rows))) rec['tickers'] = ticker_list rec['data'] = df res = req_utils.build_result( status=SUCCESS, err=None, rec=rec) except Exception as e: rec['tickers'] = [] rec['data'] = None err = ( '{} failed converting screen url={} to list ' 'with ex={}'.format( label, url, e)) log.error(err) res = req_utils.build_result( status=EX, err=err, rec=rec) # end of try/ex return res
def extract_option_calls_dataset(ticker=None, date=None, work_dict=None, scrub_mode='sort-by-date', verbose=False): """extract_option_calls_dataset Extract the TD options calls for a ticker and return a tuple (status, ``pandas.Dataframe``) .. code-block:: python import analysis_engine.td.extract_df_from_redis as td_extract # extract by historical date is also supported as an arg # date='2019-02-15' calls_status, calls_df = td_extract.extract_option_calls_dataset( ticker='SPY') print(calls_df) :param ticker: string ticker to extract :param date: optional - string date to extract formatted ``YYYY-MM-DD`` :param work_dict: dictionary of args :param scrub_mode: optional - string type of scrubbing handler to run :param verbose: optional - boolean for turning on logging """ label = 'extract_td_calls' latest_close_date = ae_utils.get_last_close_str() use_date = date if work_dict: if not ticker: ticker = work_dict.get('ticker', None) label = f'{work_dict.get("label", label)}' if not use_date: use_date = latest_close_date ds_id = ticker df_type = td_consts.DATAFEED_TD_CALLS df_str = td_consts.get_datafeed_str_td(df_type=df_type) redis_db = ae_consts.REDIS_DB redis_key = f'{ticker}_{use_date}_tdcalls' redis_host, redis_port = ae_consts.get_redis_host_and_port(req=work_dict) redis_password = ae_consts.REDIS_PASSWORD s3_key = redis_key if work_dict: redis_db = work_dict.get('redis_db', redis_db) redis_password = work_dict.get('redis_password', redis_password) verbose = work_dict.get('verbose_td', verbose) if verbose: log.info(f'{label} - {df_str} - start - redis_key={redis_key} ' f's3_key={s3_key}') exp_date_str = None calls_df = None status = ae_consts.NOT_RUN try: redis_rec = redis_get.get_data_from_redis_key(label=label, host=redis_host, port=redis_port, db=redis_db, password=redis_password, key=redis_key, decompress_df=True) status = redis_rec['status'] if verbose: log.info(f'{label} - {df_str} redis get data key={redis_key} ' f'status={ae_consts.get_status(status=status)}') if status == ae_consts.SUCCESS: calls_json = None if 'tdcalls' in redis_rec['rec']['data']: calls_json = redis_rec['rec']['data']['tdcalls'] elif 'calls' in redis_rec['rec']['data']: calls_json = redis_rec['rec']['data']['calls'] else: calls_json = redis_rec['rec']['data'] if not calls_json: return ae_consts.SUCCESS, pd.DataFrame([]) if verbose: log.info(f'{label} - {df_str} redis convert calls to df') exp_date_str = None try: calls_df = pd.read_json(calls_json, orient='records') if len(calls_df.index) == 0: return ae_consts.SUCCESS, pd.DataFrame([]) if 'date' not in calls_df: if verbose: log.error( 'failed to find date column in TD calls ' f'df={calls_df} from lens={len(calls_df.index)}') return ae_consts.SUCCESS, pd.DataFrame([]) calls_df.sort_values(by=['date', 'strike']) """ for i, r in calls_df.iterrows(): print(r['date']) convert_epochs = [ 'ask_date', 'bid_date', 'trade_date' ] for c in convert_epochs: if c in calls_df: calls_df[c] = pd.DatetimeIndex(pd.to_datetime( calls_df[c], format=ae_consts.COMMON_TICK_DATE_FORMAT )).tz_localize( 'UTC').tz_convert( 'US/Eastern') # dates converted """ exp_date_str = (calls_df['exp_date'].iloc[-1]) calls_df['date'] = calls_df['date'].dt.strftime( ae_consts.COMMON_TICK_DATE_FORMAT) except Exception as f: not_fixed = True if ('Can only use .dt accessor with ' 'datetimelike values') in str(f): try: log.critical(f'fixing dates in {redis_key}') # remove epoch second data and # use only the millisecond date values bad_date = ae_consts.EPOCH_MINIMUM_DATE calls_df['date'][calls_df['date'] < bad_date] = None calls_df = calls_df.dropna(axis=0, how='any') fmt = ae_consts.COMMON_TICK_DATE_FORMAT calls_df['date'] = pd.to_datetime( calls_df['date'], unit='ms').dt.strftime(fmt) not_fixed = False except Exception as g: log.critical( f'failed to parse date column {calls_df["date"]} ' f'with dt.strftime ex={f} and EPOCH EX={g}') return ae_consts.SUCCESS, pd.DataFrame([]) # if able to fix error or not if not_fixed: log.debug(f'{label} - {df_str} redis_key={redis_key} ' f'no calls df found or ex={f}') return ae_consts.SUCCESS, pd.DataFrame([]) # if unable to fix - return out log.error(f'{label} - {df_str} redis_key={redis_key} ' f'no calls df found or ex={f}') return ae_consts.SUCCESS, pd.DataFrame([]) # end of try/ex to convert to df if verbose: log.info( f'{label} - {df_str} redis_key={redis_key} ' f'calls={len(calls_df.index)} exp_date={exp_date_str}') else: if verbose: log.info(f'{label} - {df_str} did not find valid redis ' f'option calls in redis_key={redis_key} ' f'status={ae_consts.get_status(status=status)}') except Exception as e: if verbose: log.error( f'{label} - {df_str} - ds_id={ds_id} failed getting option ' f'calls from redis={redis_host}:{redis_port}@{redis_db} ' f'key={redis_key} ex={e}') return ae_consts.ERR, pd.DataFrame([]) # end of try/ex extract from redis if verbose: log.info( f'{label} - {df_str} ds_id={ds_id} extract scrub={scrub_mode}') scrubbed_df = scrub_utils.extract_scrub_dataset( label=label, scrub_mode=scrub_mode, datafeed_type=df_type, msg_format='df={} date_str={}', ds_id=ds_id, df=calls_df) status = ae_consts.SUCCESS return status, scrubbed_df
def backtest_with_runner(): """backtest_with_runner build and publish a trading history from an algorithm config. :: backtest_with_runner.py -t TICKER -c ALGO_CONFIG -s START_DATE -k S3_KEY -b S3_BUCKET -l """ parser = argparse.ArgumentParser( description=('backtest an algorithm and publish ' 'the trading history')) parser.add_argument('-t', help=('ticker symbol'), required=False, dest='ticker') parser.add_argument('-k', help=('s3_key'), required=False, dest='s3_key') parser.add_argument('-b', help=('s3_bucket'), required=False, dest='s3_bucket') parser.add_argument('-s', help=('start date format YYYY-MM-DD'), required=False, dest='start_date') parser.add_argument('-c', help=('algo config file'), required=False, dest='algo_config') parser.add_argument('-l', help=('run a backtest with the latest ' 'pricing data'), required=False, dest='latest', action='store_true') parser.add_argument('-d', help='debug', required=False, dest='debug', action='store_true') args = parser.parse_args() ticker = 'SPY' s3_bucket = (f'algohistory') s3_key = (f'trade_history_{ticker}') start_date = (f'2019-01-01') algo_config = (f'/opt/sa/cfg/default_algo.json') latest = False show_plot = True debug = False if args.ticker: ticker = args.ticker.upper() if args.s3_key: s3_key = args.s3_key if args.s3_bucket: s3_bucket = args.s3_bucket if args.start_date: start_date = args.start_date if args.algo_config: algo_config = args.algo_config if args.latest: latest = True start_date = ae_utils.get_last_close_str() if args.debug: debug = True history_loc = (f's3://{s3_bucket}/{s3_key}') log.info(f'building {ticker} trade history ' f'start_date={start_date} ' f'config={algo_config} ' f'history_loc={history_loc}') runner = algo_runner.AlgoRunner(ticker=ticker, start_date=start_date, history_loc=history_loc, algo_config=algo_config, verbose_algo=debug, verbose_processor=False, verbose_indicators=False) trading_history_df = None if latest: trading_history_df = runner.latest() log.info(f'{ticker} latest:') print(trading_history_df[['minute', 'close']].tail(5)) log.info(f'Other available columns to plot:') print(trading_history_df.columns.values) if show_plot: plot.plot_trading_history( title=(f'{ticker} at ' f'${trading_history_df["close"].iloc[-1]} ' f'at: ' f'{trading_history_df["minute"].iloc[-1]}'), df=trading_history_df, red='high', blue='close') else: runner.start() sys.exit(0)
def fetch_new_stock_datasets(): """fetch_new_stock_datasets Collect datasets for a ticker from IEX Cloud or Tradier .. warning: IEX Cloud charges per request. Here are example commands to help you monitor your usage while handling first time users and automation (intraday, daily, and weekly options are supported). **Setup** :: export IEX_TOKEN=YOUR_IEX_CLOUD_TOKEN export TD_TOKEN=YOUR_TRADIER_TOKEN **Pull Data for a Ticker from IEX and Tradier** :: fetch -t TICKER **Pull from All Supported IEX Feeds** :: fetch -t TICKER -g iex-all **Pull from All Supported Tradier Feeds** :: fetch -t TICKER -g td **Intraday IEX and Tradier Feeds (only minute and news to reduce costs)** :: fetch -t TICKER -g intra # or manually: # fetch -t TICKER -g td,iex_min,iex_news **Daily IEX Feeds (daily and news)** :: fetch -t TICKER -g daily # or manually: # fetch -t TICKER -g iex_day,iex_news **Weekly IEX Feeds (company, financials, earnings, dividends, and peers)** :: fetch -t TICKER -g weekly # or manually: # fetch -t TICKER -g iex_fin,iex_earn,iex_div,iex_peers,iex_news, # iex_comp **IEX Minute** :: fetch -t TICKER -g iex_min **IEX News** :: fetch -t TICKER -g iex_news **IEX Daily** :: fetch -t TICKER -g iex_day **IEX Stats** :: fetch -t TICKER -g iex_stats **IEX Peers** :: fetch -t TICKER -g iex_peers **IEX Financials** :: fetch -t TICKER -g iex_fin **IEX Earnings** :: fetch -t TICKER -g iex_earn **IEX Dividends** :: fetch -t TICKER -g iex_div **IEX Quote** :: fetch -t TICKER -g iex_quote **IEX Company** :: fetch -t TICKER -g iex_comp .. note:: This requires the following services are listening on: - redis ``localhost:6379`` - minio ``localhost:9000`` """ log.info('start - fetch_new_stock_datasets') parser = argparse.ArgumentParser( description=('Download and store the latest stock pricing, ' 'news, and options chain data ' 'and store it in Minio (S3) and Redis. ' 'Also includes support for getting FinViz ' 'screener tickers')) parser.add_argument('-t', help=('ticker'), required=False, dest='ticker') parser.add_argument( '-g', help=('optional - fetch mode: ' 'initial = default fetch from initial data feeds ' '(IEX and Tradier), ' 'intra = fetch intraday from IEX and Tradier, ' 'daily or day = fetch daily from IEX, ' 'weekly = fetch weekly from IEX, ' 'all = fetch from all data feeds, ' 'td = fetch from Tradier feeds only, ' 'iex = fetch from IEX Cloud feeds only, ' 'min or minute or iex_min = fetch IEX Cloud intraday ' 'per-minute feed ' 'https://iexcloud.io/docs/api/#historical-prices, ' 'day or daily or iex_day = fetch IEX Cloud daily feed ' 'https://iexcloud.io/docs/api/#historical-prices, ' 'quote or iex_quote = fetch IEX Cloud quotes feed ' 'https://iexcloud.io/docs/api/#quote, ' 'stats or iex_stats = fetch IEX Cloud key stats feed ' 'https://iexcloud.io/docs/api/#key-stats, ' 'peers or iex_peers = fetch from just IEX Cloud peers feed ' 'https://iexcloud.io/docs/api/#peers, ' 'news or iex_news = fetch IEX Cloud news feed ' 'https://iexcloud.io/docs/api/#news, ' 'fin or iex_fin = fetch IEX Cloud financials feed' 'https://iexcloud.io/docs/api/#financials, ' 'earn or iex_earn = fetch from just IEX Cloud earnings feeed ' 'https://iexcloud.io/docs/api/#earnings, ' 'div or iex_div = fetch from just IEX Cloud dividends feed' 'https://iexcloud.io/docs/api/#dividends, ' 'iex_comp = fetch from just IEX Cloud company feed ' 'https://iexcloud.io/docs/api/#company'), required=False, dest='fetch_mode') parser.add_argument('-i', help=('optional - ticker id ' 'not used without a database'), required=False, dest='ticker_id') parser.add_argument('-e', help=('optional - options expiration date'), required=False, dest='exp_date_str') parser.add_argument('-l', help=('optional - path to the log config file'), required=False, dest='log_config_path') parser.add_argument('-b', help=('optional - broker url for Celery'), required=False, dest='broker_url') parser.add_argument('-B', help=('optional - backend url for Celery'), required=False, dest='backend_url') parser.add_argument('-k', help=('optional - s3 access key'), required=False, dest='s3_access_key') parser.add_argument('-s', help=('optional - s3 secret key'), required=False, dest='s3_secret_key') parser.add_argument('-a', help=('optional - s3 address format: <host:port>'), required=False, dest='s3_address') parser.add_argument('-S', help=('optional - s3 ssl or not'), required=False, dest='s3_secure') parser.add_argument('-u', help=('optional - s3 bucket name'), required=False, dest='s3_bucket_name') parser.add_argument('-G', help=('optional - s3 region name'), required=False, dest='s3_region_name') parser.add_argument('-p', help=('optional - redis_password'), required=False, dest='redis_password') parser.add_argument('-r', help=('optional - redis_address format: <host:port>'), required=False, dest='redis_address') parser.add_argument('-n', help=('optional - redis and s3 key name'), required=False, dest='keyname') parser.add_argument( '-m', help=('optional - redis database number (0 by default)'), required=False, dest='redis_db') parser.add_argument('-x', help=('optional - redis expiration in seconds'), required=False, dest='redis_expire') parser.add_argument('-z', help=('optional - strike price'), required=False, dest='strike') parser.add_argument( '-c', help=('optional - contract type "C" for calls "P" for puts'), required=False, dest='contract_type') parser.add_argument( '-P', help=('optional - get pricing data if "1" or "0" disabled'), required=False, dest='get_pricing') parser.add_argument( '-N', help=('optional - get news data if "1" or "0" disabled'), required=False, dest='get_news') parser.add_argument( '-O', help=('optional - get options data if "1" or "0" disabled'), required=False, dest='get_options') parser.add_argument('-U', help=('optional - s3 enabled for publishing if "1" or ' '"0" is disabled'), required=False, dest='s3_enabled') parser.add_argument( '-R', help=('optional - redis enabled for publishing if "1" or ' '"0" is disabled'), required=False, dest='redis_enabled') parser.add_argument('-A', help=('optional - run an analysis ' 'supported modes: scn'), required=False, dest='analysis_type') parser.add_argument('-L', help=('optional - screener urls to pull ' 'tickers for analysis'), required=False, dest='urls') parser.add_argument( '-Z', help=('disable run without an engine for local testing and demos'), required=False, dest='celery_enabled', action='store_true') parser.add_argument('-F', help=('optional - backfill date for filling in ' 'gaps for the IEX Cloud minute dataset ' 'format is YYYY-MM-DD'), required=False, dest='backfill_date') parser.add_argument('-d', help=('debug'), required=False, dest='debug', action='store_true') args = parser.parse_args() run_offline = True ticker = ae_consts.TICKER ticker_id = ae_consts.TICKER_ID fetch_mode = 'initial' exp_date_str = ae_consts.NEXT_EXP_STR ssl_options = ae_consts.SSL_OPTIONS transport_options = ae_consts.TRANSPORT_OPTIONS broker_url = ae_consts.WORKER_BROKER_URL backend_url = ae_consts.WORKER_BACKEND_URL celery_config_module = ae_consts.WORKER_CELERY_CONFIG_MODULE include_tasks = ae_consts.INCLUDE_TASKS s3_access_key = ae_consts.S3_ACCESS_KEY s3_secret_key = ae_consts.S3_SECRET_KEY s3_region_name = ae_consts.S3_REGION_NAME s3_address = ae_consts.S3_ADDRESS s3_secure = ae_consts.S3_SECURE s3_bucket_name = ae_consts.S3_BUCKET s3_key = ae_consts.S3_KEY redis_address = ae_consts.REDIS_ADDRESS redis_key = ae_consts.REDIS_KEY redis_password = ae_consts.REDIS_PASSWORD redis_db = ae_consts.REDIS_DB redis_expire = ae_consts.REDIS_EXPIRE strike = None contract_type = None get_pricing = True get_news = True get_options = True s3_enabled = True redis_enabled = True analysis_type = None backfill_date = None debug = False if args.ticker: ticker = args.ticker.upper() if args.ticker_id: ticker_id = args.ticker_id if args.exp_date_str: exp_date_str = ae_consts.NEXT_EXP_STR if args.broker_url: broker_url = args.broker_url if args.backend_url: backend_url = args.backend_url if args.s3_access_key: s3_access_key = args.s3_access_key if args.s3_secret_key: s3_secret_key = args.s3_secret_key if args.s3_region_name: s3_region_name = args.s3_region_name if args.s3_address: s3_address = args.s3_address if args.s3_secure: s3_secure = args.s3_secure if args.s3_bucket_name: s3_bucket_name = args.s3_bucket_name if args.keyname: s3_key = args.keyname redis_key = args.keyname if args.redis_address: redis_address = args.redis_address if args.redis_password: redis_password = args.redis_password if args.redis_db: redis_db = args.redis_db if args.redis_expire: redis_expire = args.redis_expire if args.strike: strike = args.strike if args.contract_type: contract_type = args.contract_type if args.get_pricing: get_pricing = args.get_pricing == '1' if args.get_news: get_news = args.get_news == '1' if args.get_options: get_options = args.get_options == '1' if args.s3_enabled: s3_enabled = args.s3_enabled == '1' if args.redis_enabled: redis_enabled = args.redis_enabled == '1' if args.fetch_mode: fetch_mode = str(args.fetch_mode).lower() if args.analysis_type: analysis_type = str(args.analysis_type).lower() if args.celery_enabled: run_offline = False if args.backfill_date: backfill_date = args.backfill_date if args.debug: debug = True work = api_requests.build_get_new_pricing_request() work['ticker'] = ticker work['ticker_id'] = ticker_id work['s3_bucket'] = s3_bucket_name work['s3_key'] = s3_key work['redis_key'] = redis_key work['strike'] = strike work['contract'] = contract_type work['exp_date'] = exp_date_str work['s3_access_key'] = s3_access_key work['s3_secret_key'] = s3_secret_key work['s3_region_name'] = s3_region_name work['s3_address'] = s3_address work['s3_secure'] = s3_secure work['redis_address'] = redis_address work['redis_password'] = redis_password work['redis_db'] = redis_db work['redis_expire'] = redis_expire work['get_pricing'] = get_pricing work['get_news'] = get_news work['get_options'] = get_options work['s3_enabled'] = s3_enabled work['redis_enabled'] = redis_enabled work['fetch_mode'] = fetch_mode work['analysis_type'] = analysis_type work['iex_datasets'] = iex_consts.DEFAULT_FETCH_DATASETS work['backfill_date'] = backfill_date work['debug'] = debug work['label'] = f'ticker={ticker}' if analysis_type == 'scn': label = f'screener={work["ticker"]}' fv_urls = [] if args.urls: fv_urls = str(args.urls).split('|') if len(fv_urls) == 0: fv_urls = os.getenv('SCREENER_URLS', []).split('|') screener_req = api_requests.build_screener_analysis_request( ticker=ticker, fv_urls=fv_urls, label=label) work.update(screener_req) start_screener_analysis(req=work) # end of analysis_type else: last_close_date = ae_utils.last_close() last_close_str = last_close_date.strftime(ae_consts.COMMON_DATE_FORMAT) cache_base_key = f'{ticker}_{last_close_str}' if not args.keyname: work['s3_key'] = cache_base_key work['redis_key'] = cache_base_key path_to_tasks = 'analysis_engine.work_tasks' task_name = (f'{path_to_tasks}' f'.get_new_pricing_data.get_new_pricing_data') task_res = None if ae_consts.is_celery_disabled() or run_offline: work['celery_disabled'] = True work['verbose'] = debug log.debug(f'starting without celery work={ae_consts.ppj(work)} ' f'offline={run_offline}') task_res = task_pricing.get_new_pricing_data(work) status_str = ae_consts.get_status(status=task_res['status']) cur_date = backfill_date if not backfill_date: cur_date = ae_utils.get_last_close_str() redis_arr = work["redis_address"].split(':') include_results = '' if debug: include_results = task_res['rec'] if task_res['status'] == ae_consts.SUCCESS: if task_res['rec']['num_success'] == 0: log.error(f'failed fetching ticker={work["ticker"]} ' f'from {fetch_mode} - please check the ' 'environment variables') else: log.info(f'done fetching ticker={work["ticker"]} ' f'mode={fetch_mode} ' f'status={status_str} ' f'err={task_res["err"]} {include_results}') print('View keys in redis with:\n' f'redis-cli -h {redis_arr[0]} ' 'keys ' f'"{work["ticker"]}_{cur_date}*"') elif task_res['status'] == ae_consts.MISSING_TOKEN: print('Set an IEX or Tradier token: ' '\n' ' export IEX_TOKEN=YOUR_IEX_TOKEN\n' ' export TD_TOKEN=YOUR_TD_TOKEN\n') else: log.error(f'done fetching ticker={work["ticker"]} ' f'mode={fetch_mode} ' f'status={status_str} ' f'err={task_res["err"]}') # if/else debug else: log.debug(f'connecting to broker={broker_url} ' f'backend={backend_url}') # Get the Celery app app = get_celery_app.get_celery_app( name=__name__, auth_url=broker_url, backend_url=backend_url, path_to_config_module=celery_config_module, ssl_options=ssl_options, transport_options=transport_options, include_tasks=include_tasks) log.debug(f'calling task={task_name} - work={ae_consts.ppj(work)}') job_id = app.send_task(task_name, (work, )) log.debug(f'task={task_name} - job_id={job_id}')
def publish_trading_history(self, records_for_history, pt_s3_access_key=None, pt_s3_secret_key=None, pt_s3_address=None, pt_s3_region=None, pt_s3_bucket=None, pt_s3_key=None, pt_s3_secure=ae_consts.NOT_SET, **kwargs): """publish_trading_history Helper for publishing a trading history to another S3 service like AWS :param records_for_history: list of dictionaries for the history file :param pt_s3_access_key: access key :param pt_s3_secret_key: secret :param pt_s3_address: address :param pt_s3_region: region :param pt_s3_bucket: bucket :param pt_s3_key: key :param pt_s3_secure: secure flag :param kwargs: support for keyword arg dict """ use_s3_access_key = self.pt_s3_access_key use_s3_secret_key = self.pt_s3_secret_key use_s3_address = self.pt_s3_address use_s3_region = self.pt_s3_region use_s3_bucket = self.pt_s3_bucket use_s3_key = self.pt_s3_key use_s3_secure = self.pt_s3_secure use_s3_enabled = kwargs.get('s3_enabled', True) use_redis_enabled = kwargs.get('redis_enabled', False) use_redis_address = kwargs.get('redis_address', None) use_redis_db = kwargs.get('redis_db', None) use_redis_key = kwargs.get('redis_key', None) use_redis_password = kwargs.get('redis_password', None) use_redis_expire = kwargs.get('redis_expire', None) use_redis_serializer = kwargs.get('redis_serializer', 'json') use_redis_encoding = kwargs.get('redis_encoding', 'utf-8') verbose = kwargs.get('verbose', False) if pt_s3_access_key: use_s3_access_key = pt_s3_access_key if pt_s3_secret_key: use_s3_secret_key = pt_s3_secret_key if pt_s3_address: use_s3_address = pt_s3_address if pt_s3_region: use_s3_region = pt_s3_region if pt_s3_bucket: use_s3_bucket = pt_s3_bucket if pt_s3_key: use_s3_key = pt_s3_key if pt_s3_secure != ae_consts.NOT_SET: use_s3_secure = pt_s3_secure rec = { 'tickers': self.ticker, 'version': int(ae_consts.ALGO_HISTORY_VERSION), 'last_trade_date': ae_utils.get_last_close_str(), 'algo_config_dict': self.config_dict, 'algo_name': self.use_name, 'created': ae_utils.utc_now_str(), self.ticker: records_for_history } num_bytes = len(str(rec)) num_mb = ae_consts.get_mb(num_bytes) msg = ( f'publish - {self.ticker} - {rec["last_trade_date"]} ' # f'{use_s3_access_key} with: {use_s3_secret_key} ' f's3_loc={use_s3_address}/{use_s3_bucket}/{use_s3_key} ' f'mb={num_mb}MB') log.info(msg) publish.publish(data=rec, label='pub', df_compress=True, compress=False, convert_to_dict=False, output_file=None, redis_enabled=use_redis_enabled, redis_key=use_redis_key, redis_address=use_redis_address, redis_db=use_redis_db, redis_password=use_redis_password, redis_expire=use_redis_expire, redis_serializer=use_redis_serializer, redis_encoding=use_redis_encoding, s3_enabled=use_s3_enabled, s3_key=use_s3_key, s3_address=use_s3_address, s3_bucket=use_s3_bucket, s3_access_key=use_s3_access_key, s3_secret_key=use_s3_secret_key, s3_region_name=use_s3_region, s3_secure=use_s3_secure, slack_enabled=False, verbose=verbose)
def fetch_calls(work_dict, scrub_mode='sort-by-date'): """fetch_calls Fetch the Tradier daily data for a ticker and return it as a ``pandas.DataFrame``. :param work_dict: dictionary of args :param scrub_mode: type of scrubbing handler to run """ datafeed_type = td_consts.DATAFEED_TD_CALLS ticker = work_dict.get('ticker', None) label = work_dict.get('label', None) exp_date = work_dict.get('exp_date', None) log.debug(f'{label} - call - scrub_mode={scrub_mode} ' f'args={work_dict} ticker={ticker}') exp_date = opt_dates.option_expiration().strftime( ae_consts.COMMON_DATE_FORMAT) use_url = td_consts.TD_URLS['options'].format(ticker, exp_date) headers = td_consts.get_auth_headers() session = requests.Session() session.headers = headers res = url_helper.url_helper(sess=session).get(use_url) if res.status_code != requests.codes.OK: if res.status_code in [401, 403]: log.critical('Please check the TD_TOKEN is correct ' f'received {res.status_code} during ' 'fetch for: calls') else: log.info(f'failed to get call with response={res} ' f'code={res.status_code} ' f'text={res.text}') return ae_consts.EMPTY, pd.DataFrame([{}]) records = json.loads(res.text) org_records = records.get('options', {}).get('option', []) if len(org_records) == 0: log.info('failed to get call records ' f'text={res.text}') return ae_consts.EMPTY, pd.DataFrame([{}]) options_list = [] # assumes UTC conversion will work with the system clock created_minute = ( datetime.datetime.utcnow() - datetime.timedelta(hours=5)).strftime('%Y-%m-%d %H:%M:00') last_close_date = ae_utils.get_last_close_str(fmt='%Y-%m-%d %H:%M:00') # hit bug where dates were None if not last_close_date: last_close_date = created_minute for node in org_records: node['date'] = last_close_date node['created'] = created_minute node['ticker'] = ticker if (node['option_type'] == 'call' and node['expiration_type'] == 'standard'): node['opt_type'] = int(ae_consts.OPTION_CALL) node['exp_date'] = node['expiration_date'] new_node = {} for col in td_consts.TD_OPTION_COLUMNS: if col in node: if col in td_consts.TD_EPOCH_COLUMNS: # trade_date can be None if node[col] == 0: new_node[col] = None else: new_node[col] = ae_utils.epoch_to_dt( epoch=node[col] / 1000, use_utc=False, convert_to_est=True).strftime( ae_consts.COMMON_TICK_DATE_FORMAT) """ Debug epoch ms converter: """ """ print('-----------') print(col) print(node[col]) print(new_node[col]) print('===========') """ # if/else valid date else: new_node[col] = node[col] # if date column to convert # if column is in the row # convert all columns options_list.append(new_node) # end of records full_df = pd.DataFrame(options_list).sort_values(by=['strike'], ascending=True) num_chains = len(full_df.index) mid_chain_idx = int(num_chains / 2) low_idx = int(mid_chain_idx - 20) high_idx = int(mid_chain_idx + 30) if low_idx < 0: low_idx = 0 if high_idx > num_chains: high_idx = num_chains df = full_df[low_idx:high_idx].copy().sort_values( by=['date', 'strike']).reset_index() scrubbed_df = scrub_utils.ingress_scrub_dataset( label=label, scrub_mode=scrub_mode, datafeed_type=datafeed_type, msg_format='df={} date_str={}', ds_id=ticker, date_str=exp_date, df=df) return ae_consts.SUCCESS, scrubbed_df
def get_data_from_yahoo(work_dict): """get_data_from_yahoo Get data from yahoo :param work_dict: request dictionary """ label = 'get_data_from_yahoo' log.info(f'task - {label} - start work_dict={work_dict}') num_news_rec = 0 num_option_calls = 0 num_option_puts = 0 cur_high = -1 cur_low = -1 cur_open = -1 cur_close = -1 cur_volume = -1 rec = { 'pricing': None, 'options': None, 'calls': None, 'puts': None, 'news': None, 'exp_date': None, 'publish_pricing_update': None, 'date': None, 'updated': None } res = {'status': NOT_RUN, 'err': None, 'rec': rec} log.error('sorry - yahoo is disabled and ' 'pinance is no longer supported ' 'https://github.com/neberej/pinance') return res try: ticker = work_dict.get('ticker', TICKER) exp_date = work_dict.get('exp_date', None) cur_strike = work_dict.get('strike', None) contract_type = str(work_dict.get('contract', 'C')).upper() get_pricing = work_dict.get('get_pricing', True) get_news = work_dict.get('get_news', True) get_options = work_dict.get('get_options', True) orient = work_dict.get('orient', 'records') label = work_dict.get('label', label) ticker_results = None num_news_rec = 0 use_date = exp_date if not exp_date: exp_date = opt_dates.option_expiration(date=exp_date) use_date = exp_date.strftime('%Y-%m-%d') """ Debug control flags Quickly turn specific fetches off: get_news = False get_pricing = False get_options = False """ if get_pricing: log.info(f'{label} getting ticker={ticker} pricing') ticker_results.get_quotes() if ticker_results.quotes_data: pricing_dict = ticker_results.quotes_data cur_high = pricing_dict.get('regularMarketDayHigh', None) cur_low = pricing_dict.get('regularMarketDayLow', None) cur_open = pricing_dict.get('regularMarketOpen', None) cur_close = pricing_dict.get('regularMarketPreviousClose', None) cur_volume = pricing_dict.get('regularMarketVolume', None) pricing_dict['high'] = cur_high pricing_dict['low'] = cur_low pricing_dict['open'] = cur_open pricing_dict['close'] = cur_close pricing_dict['volume'] = cur_volume pricing_dict['date'] = get_last_close_str() if 'regularMarketTime' in pricing_dict: pricing_dict['market_time'] = \ datetime.datetime.fromtimestamp( pricing_dict['regularMarketTime']).strftime( COMMON_TICK_DATE_FORMAT) if 'postMarketTime' in pricing_dict: pricing_dict['post_market_time'] = \ datetime.datetime.fromtimestamp( pricing_dict['postMarketTime']).strftime( COMMON_TICK_DATE_FORMAT) log.info(f'{label} ticker={ticker} converting pricing to ' f'df orient={orient}') try: rec['pricing'] = pricing_dict except Exception as f: rec['pricing'] = '{}' log.info( f'{label} ticker={ticker} failed converting pricing ' f'data={ppj(pricing_dict)} to df ex={f}') # try/ex log.info(f'{label} ticker={ticker} done converting pricing to ' f'df orient={orient}') else: log.error(f'{label} ticker={ticker} ' f'missing quotes_data={ticker_results.quotes_data}') # end of if ticker_results.quotes_data log.info( f'{label} ticker={ticker} close={cur_close} vol={cur_volume}') else: log.info(f'{label} skip - getting ticker={ticker} pricing') # if get_pricing if get_news: log.info(f'{label} getting ticker={ticker} news') ticker_results.get_news() if ticker_results.news_data: news_list = None try: news_list = ticker_results.news_data log.info(f'{label} ticker={ticker} converting news to ' f'df orient={orient}') num_news_rec = len(news_list) rec['news'] = news_list except Exception as f: rec['news'] = '{}' log.info(f'{label} ticker={ticker} failed converting news ' f'data={news_list} to df ex={f}') # try/ex log.info(f'{label} ticker={ticker} done converting news to ' f'df orient={orient}') else: log.info(f'{label} ticker={ticker} Yahoo NO ' f'news={ticker_results.news_data}') # end of if ticker_results.news_data else: log.info(f'{label} skip - getting ticker={ticker} news') # end if get_news if get_options: get_all_strikes = True if get_all_strikes: cur_strike = None else: if cur_close: cur_strike = int(cur_close) if not cur_strike: cur_strike = 287 log.info( f'{label} ticker={ticker} num_news={num_news_rec} get options ' f'close={cur_close} exp_date={use_date} ' f'contract={contract_type} strike={cur_strike}') options_dict = \ yahoo_get_pricing.get_options( ticker=ticker, exp_date_str=use_date, contract_type=contract_type, strike=cur_strike) rec['options'] = '{}' try: log.info(f'{label} ticker={ticker} converting options to ' f'df orient={orient}') num_option_calls = options_dict.get('num_calls', None) num_option_puts = options_dict.get('num_puts', None) rec['options'] = { 'exp_date': options_dict.get('exp_date', None), 'calls': options_dict.get('calls', None), 'puts': options_dict.get('puts', None), 'num_calls': num_option_calls, 'num_puts': num_option_puts } rec['calls'] = rec['options'].get('calls', EMPTY_DF_STR) rec['puts'] = rec['options'].get('puts', EMPTY_DF_STR) except Exception as f: rec['options'] = '{}' log.info(f'{label} ticker={ticker} failed converting options ' f'data={options_dict} to df ex={f}') # try/ex log.info(f'{label} ticker={ticker} done converting options to ' f'df orient={orient} num_calls={num_option_calls} ' f'num_puts={num_option_puts}') else: log.info(f'{label} skip - getting ticker={ticker} options') # end of if get_options log.info( f'{label} yahoo pricing for ticker={ticker} close={cur_close} ' f'num_calls={num_option_calls} num_puts={num_option_puts} ' f'news={num_news_rec}') fields_to_upload = ['pricing', 'options', 'calls', 'puts', 'news'] for field_name in fields_to_upload: upload_and_cache_req = copy.deepcopy(work_dict) upload_and_cache_req['celery_disabled'] = True upload_and_cache_req['data'] = rec[field_name] if not upload_and_cache_req['data']: upload_and_cache_req['data'] = '{}' if 'redis_key' in work_dict: upload_and_cache_req['redis_key'] = f'''{work_dict.get( 'redis_key', f'{ticker}_{field_name}')}_{field_name}''' if 's3_key' in work_dict: upload_and_cache_req['s3_key'] = f'''{work_dict.get( 's3_key', f'{ticker}_{field_name}')}_{field_name}''' try: update_res = publisher.run_publish_pricing_update( work_dict=upload_and_cache_req) update_status = update_res.get('status', NOT_SET) log.info(f'{label} publish update ' f'status={get_status(status=update_status)} ' f'data={update_res}') except Exception: err = (f'{label} - failed to upload YAHOO ' f'data={upload_and_cache_req} to ' f's3_key={upload_and_cache_req["s3_key"]} and ' f'redis_key={upload_and_cache_req["redis_key"]}') log.error(err) # end of try/ex to upload and cache if not rec[field_name]: log.debug(f'{label} - ticker={ticker} no data from YAHOO for ' f'field_name={field_name}') # end of for all fields res = build_result.build_result(status=SUCCESS, err=None, rec=rec) except Exception as e: res = build_result.build_result(status=ERR, err=('failed - get_data_from_yahoo ' f'dict={work_dict} with ex={e}'), rec=rec) log.error(f'{label} - {res["err"]}') # end of try/ex log.info('task - get_data_from_yahoo done - ' f'{label} - status={get_status(res["status"])}') return res
def latest(self, date_str=None, start_row=-200, extract_iex=True, extract_yahoo=False, extract_td=True, verbose=False, **kwargs): """latest Run the algorithm with the latest pricing data. Also supports running a backtest for a historical date in the pricing history (format ``YYYY-MM-DD``) :param date_str: optional - string start date ``YYYY-MM-DD`` default is the latest close date :param start_row: negative number of rows back from the end of the list in the data default is ``-200`` where this means the algorithm will process the latest 200 rows in the minute dataset :param extract_iex: bool flag for extracting from ``IEX`` :param extract_yahoo: bool flag for extracting from ``Yahoo`` which is disabled as of 1/2019 :param extract_td: bool flag for extracting from ``Tradier`` :param verbose: bool flag for logs :param kwargs: keyword arg dict """ use_date_str = date_str if not use_date_str: use_date_str = ae_utils.get_last_close_str() log.info(f'creating algo') self.algo_obj = base_algo.BaseAlgo( ticker=self.config_dict['ticker'], balance=self.config_dict['balance'], commission=self.config_dict['commission'], name=self.use_name, start_date=self.use_start_date, end_date=self.use_end_date, auto_fill=self.auto_fill, config_dict=self.config_dict, load_from_s3_bucket=self.load_from_s3_bucket, load_from_s3_key=self.load_from_s3_key, load_from_redis_key=self.load_from_redis_key, load_from_file=self.load_from_file, load_compress=self.load_compress, load_publish=self.load_publish, load_config=self.load_config, report_redis_key=self.report_redis_key, report_s3_bucket=self.report_s3_bucket, report_s3_key=self.report_s3_key, report_file=self.report_file, report_compress=self.report_compress, report_publish=self.report_publish, report_config=self.report_config, history_redis_key=self.history_redis_key, history_s3_bucket=self.history_s3_bucket, history_s3_key=self.history_s3_key, history_file=self.history_file, history_compress=self.history_compress, history_publish=self.history_publish, history_config=self.history_config, extract_redis_key=self.extract_redis_key, extract_s3_bucket=self.extract_s3_bucket, extract_s3_key=self.extract_s3_key, extract_file=self.extract_file, extract_save_dir=self.extract_save_dir, extract_compress=self.extract_compress, extract_publish=self.extract_publish, extract_config=self.extract_config, publish_to_slack=self.publish_to_slack, publish_to_s3=self.publish_to_s3, publish_to_redis=self.publish_to_redis, dataset_type=self.dataset_type, serialize_datasets=self.serialize_datasets, compress=self.compress, encoding=self.encoding, redis_enabled=self.redis_enabled, redis_key=self.redis_key, redis_address=self.redis_address, redis_db=self.redis_db, redis_password=self.redis_password, redis_expire=self.redis_expire, redis_serializer=self.redis_serializer, redis_encoding=self.redis_encoding, s3_enabled=self.s3_enabled, s3_key=self.s3_key, s3_address=self.s3_address, s3_bucket=self.s3_bucket, s3_access_key=self.s3_access_key, s3_secret_key=self.s3_secret_key, s3_region_name=self.s3_region_name, s3_secure=self.s3_secure, slack_enabled=self.slack_enabled, slack_code_block=self.slack_code_block, slack_full_width=self.slack_full_width, dataset_publish_extract=self.extract_publish, dataset_publish_history=self.history_publish, dataset_publish_report=self.report_publish, run_on_engine=self.run_on_engine, auth_url=self.broker_url, backend_url=self.backend_url, include_tasks=self.include_tasks, ssl_options=self.ssl_options, transport_options=self.transport_options, path_to_config_module=self.path_to_config_module, timeseries=self.timeseries, trade_strategy=self.trade_strategy, verbose=False, raise_on_err=self.raise_on_err) log.info(f'run latest - start') ticker = self.config_dict['ticker'] self.common_fetch_vals['base_key'] = f'{ticker}_{use_date_str}' extract_req = api_requests.get_ds_dict( ticker=ticker, base_key=self.common_fetch_vals['base_key'], ds_id=ticker, service_dict=self.common_fetch_vals) node_date_key = use_date_str.replace(f'{ticker}_', '') req = { 'id': use_date_str, 'ticker': ticker, 'date_key': self.common_fetch_vals['base_key'], 'date': node_date_key, 'req': extract_req } # fetch iex_daily_df = None iex_minute_df = None iex_quote_df = None iex_stats_df = None iex_peers_df = None iex_news_df = None iex_financials_df = None iex_earnings_df = None iex_dividends_df = None iex_company_df = None yahoo_option_calls_df = None yahoo_option_puts_df = None yahoo_pricing_df = None yahoo_news_df = None td_calls_df = None td_puts_df = None node_date_key = req['date'] dataset_node_id = req['id'] dataset_id = dataset_node_id label = (f'ticker={ticker} ' f'date={node_date_key}') if verbose: log.info(f'{label} - extract - start') if 'daily' in self.iex_datasets or extract_iex: iex_daily_status, iex_daily_df = \ iex_extract_utils.extract_daily_dataset( extract_req) if iex_daily_status != ae_consts.SUCCESS: if verbose: log.warning(f'unable to extract iex_daily={ticker}') if 'minute' in self.iex_datasets or extract_iex: iex_minute_status, iex_minute_df = \ iex_extract_utils.extract_minute_dataset( extract_req) if iex_minute_status != ae_consts.SUCCESS: if verbose: log.warning(f'unable to extract iex_minute={ticker}') if 'quote' in self.iex_datasets or extract_iex: iex_quote_status, iex_quote_df = \ iex_extract_utils.extract_quote_dataset( extract_req) if iex_quote_status != ae_consts.SUCCESS: if verbose: log.warning(f'unable to extract iex_quote={ticker}') if 'stats' in self.iex_datasets or extract_iex: iex_stats_status, iex_stats_df = \ iex_extract_utils.extract_stats_dataset( extract_req) if iex_stats_status != ae_consts.SUCCESS: if verbose: log.warning(f'unable to extract iex_stats={ticker}') if 'peers' in self.iex_datasets or extract_iex: iex_peers_status, iex_peers_df = \ iex_extract_utils.extract_peers_dataset( extract_req) if iex_peers_status != ae_consts.SUCCESS: if verbose: log.warning(f'unable to extract iex_peers={ticker}') if 'news' in self.iex_datasets or extract_iex: iex_news_status, iex_news_df = \ iex_extract_utils.extract_news_dataset( extract_req) if iex_news_status != ae_consts.SUCCESS: if verbose: log.warning(f'unable to extract iex_news={ticker}') if 'financials' in self.iex_datasets or extract_iex: iex_financials_status, iex_financials_df = \ iex_extract_utils.extract_financials_dataset( extract_req) if iex_financials_status != ae_consts.SUCCESS: if verbose: log.warning(f'unable to extract iex_financials={ticker}') if 'earnings' in self.iex_datasets or extract_iex: iex_earnings_status, iex_earnings_df = \ iex_extract_utils.extract_earnings_dataset( extract_req) if iex_earnings_status != ae_consts.SUCCESS: if verbose: log.warning(f'unable to extract iex_earnings={ticker}') if 'dividends' in self.iex_datasets or extract_iex: iex_dividends_status, iex_dividends_df = \ iex_extract_utils.extract_dividends_dataset( extract_req) if iex_dividends_status != ae_consts.SUCCESS: if verbose: log.warning(f'unable to extract iex_dividends={ticker}') if 'company' in self.iex_datasets or extract_iex: iex_company_status, iex_company_df = \ iex_extract_utils.extract_company_dataset( extract_req) if iex_company_status != ae_consts.SUCCESS: if verbose: log.warning(f'unable to extract iex_company={ticker}') # end of iex extracts if extract_yahoo: yahoo_options_status, yahoo_option_calls_df = \ yahoo_extract_utils.extract_option_calls_dataset( extract_req) yahoo_options_status, yahoo_option_puts_df = \ yahoo_extract_utils.extract_option_puts_dataset( extract_req) if yahoo_options_status != ae_consts.SUCCESS: if verbose: log.warning(f'unable to extract yahoo_options={ticker}') yahoo_pricing_status, yahoo_pricing_df = \ yahoo_extract_utils.extract_pricing_dataset( extract_req) if yahoo_pricing_status != ae_consts.SUCCESS: if verbose: log.warning(f'unable to extract yahoo_pricing={ticker}') yahoo_news_status, yahoo_news_df = \ yahoo_extract_utils.extract_yahoo_news_dataset( extract_req) if yahoo_news_status != ae_consts.SUCCESS: if verbose: log.warning(f'unable to extract yahoo_news={ticker}') # end of yahoo extracts if extract_td: """ Debug by setting: extract_req['verbose_td'] = True """ convert_to_datetime = [ 'date', 'created', 'ask_date', 'bid_date', 'trade_date' ] td_calls_status, td_calls_df = \ td_extract_utils.extract_option_calls_dataset( extract_req) if td_calls_status != ae_consts.SUCCESS: if verbose: log.warning(f'unable to extract tdcalls={ticker}') else: if ae_consts.is_df(df=td_calls_df): for c in convert_to_datetime: if c in td_calls_df: td_calls_df[c] = pd.to_datetime( td_calls_df[c], format=ae_consts.COMMON_TICK_DATE_FORMAT) if 'date' in td_calls_df: td_calls_df.sort_values('date', ascending=True) # end of converting dates td_puts_status, td_puts_df = \ td_extract_utils.extract_option_puts_dataset( extract_req) if td_puts_status != ae_consts.SUCCESS: if verbose: log.warning(f'unable to extract tdputs={ticker}') else: if ae_consts.is_df(df=td_puts_df): for c in convert_to_datetime: if c in td_puts_df: td_puts_df[c] = pd.to_datetime( td_puts_df[c], format=ae_consts.COMMON_TICK_DATE_FORMAT) if 'date' in td_puts_df: td_puts_df.sort_values('date', ascending=True) # end of converting dates # td extracts # map extracted data to DEFAULT_SERIALIZED_DATASETS ticker_data = {} ticker_data['daily'] = iex_daily_df ticker_data['minute'] = iex_minute_df ticker_data['quote'] = iex_quote_df ticker_data['stats'] = iex_stats_df ticker_data['peers'] = iex_peers_df ticker_data['news1'] = iex_news_df ticker_data['financials'] = iex_financials_df ticker_data['earnings'] = iex_earnings_df ticker_data['dividends'] = iex_dividends_df ticker_data['company'] = iex_company_df ticker_data['calls'] = yahoo_option_calls_df ticker_data['puts'] = yahoo_option_puts_df ticker_data['pricing'] = yahoo_pricing_df ticker_data['news'] = yahoo_news_df ticker_data['tdcalls'] = td_calls_df ticker_data['tdputs'] = td_puts_df algo_data_req = { ticker: [{ 'id': dataset_id, # id is currently the cache key in redis 'date': use_date_str, # used to confirm dates in asc order 'data': ticker_data, 'start_row': start_row }] } if verbose: log.info(f'extract - {label} ' f'dataset={len(algo_data_req[ticker])}') # this could be a separate celery task try: if verbose: log.info(f'handle_data START - {label} from ' f'{node_date_key}') self.algo_obj.handle_data(data=algo_data_req) if verbose: log.info(f'handle_data END - {label} from ' f'{node_date_key}') except Exception as e: a_name = self.algo_obj.get_name() a_debug_msg = self.algo_obj.get_debug_msg() if not a_debug_msg: a_debug_msg = 'debug message not set' # a_config_dict = ae_consts.ppj(self.algo_obj.config_dict) msg = (f'{label} - algo={a_name} ' f'encountered exception in handle_data tickers={ticker} ' f'from {node_date_key} ex={e} ' f'and failed during operation: {a_debug_msg}') log.critical(f'{msg}') # end try/ex log.info(f'run latest - create history') history_ds = self.algo_obj.create_history_dataset() self.history_df = pd.DataFrame(history_ds[ticker]) self.determine_latest_times_in_history() self.num_rows = len(self.history_df.index) if verbose: log.info(self.history_df[['minute', 'close']].tail(5)) log.info(f'run latest minute={self.end_date} - ' f'rows={self.num_rows} - done') return self.get_history()
else: break fixed_df = pd.DataFrame(new_recs) if 'date' in fixed_df: fixed_df.sort_values(by=['date'], ascending=True).reset_index() print(fixed_df) return fixed_df # end of fix_df use_redis_address = ae_consts.REDIS_ADDRESS last_close_str = ae_utils.get_last_close_str(ae_consts.COMMON_DATE_FORMAT) use_date_str = last_close_str src_date = '2019-02-15' dst_date = src_date dst_date = '2019-02-14' tickers = ['SPY'] for ticker in tickers: log.info(f'extracting src df for ticker: {ticker}') res = None # get from a date or the latest if not set if src_date: use_key = f'{ticker}_{src_date}'
def get_data_from_yahoo(work_dict): """get_data_from_yahoo Get data from yahoo :param work_dict: request dictionary """ label = 'get_data_from_yahoo' log.info('task - {} - start ' 'work_dict={}'.format(label, work_dict)) num_news_rec = 0 num_option_calls = 0 num_option_puts = 0 cur_high = -1 cur_low = -1 cur_open = -1 cur_close = -1 cur_volume = -1 rec = { 'pricing': None, 'options': None, 'calls': None, 'puts': None, 'news': None, 'exp_date': None, 'publish_pricing_update': None, 'date': None, 'updated': None } res = {'status': NOT_RUN, 'err': None, 'rec': rec} try: ticker = work_dict.get('ticker', TICKER) exp_date = work_dict.get('exp_date', None) cur_strike = work_dict.get('strike', None) contract_type = str(work_dict.get('contract', 'C')).upper() get_pricing = work_dict.get('get_pricing', True) get_news = work_dict.get('get_news', True) get_options = work_dict.get('get_options', True) orient = work_dict.get('orient', 'records') label = work_dict.get('label', label) ticker_results = pinance.Pinance(ticker) num_news_rec = 0 use_date = exp_date if not exp_date: exp_date = opt_dates.option_expiration(date=exp_date) use_date = exp_date.strftime('%Y-%m-%d') """ Debug control flags Quickly turn specific fetches off: get_news = False get_pricing = False get_options = False """ if get_pricing: log.info('{} getting ticker={} pricing'.format(label, ticker)) ticker_results.get_quotes() if ticker_results.quotes_data: pricing_dict = ticker_results.quotes_data cur_high = pricing_dict.get('regularMarketDayHigh', None) cur_low = pricing_dict.get('regularMarketDayLow', None) cur_open = pricing_dict.get('regularMarketOpen', None) cur_close = pricing_dict.get('regularMarketPreviousClose', None) cur_volume = pricing_dict.get('regularMarketVolume', None) pricing_dict['high'] = cur_high pricing_dict['low'] = cur_low pricing_dict['open'] = cur_open pricing_dict['close'] = cur_close pricing_dict['volume'] = cur_volume pricing_dict['date'] = get_last_close_str() if 'regularMarketTime' in pricing_dict: pricing_dict['market_time'] = \ datetime.datetime.fromtimestamp( pricing_dict['regularMarketTime']).strftime( COMMON_TICK_DATE_FORMAT) if 'postMarketTime' in pricing_dict: pricing_dict['post_market_time'] = \ datetime.datetime.fromtimestamp( pricing_dict['postMarketTime']).strftime( COMMON_TICK_DATE_FORMAT) log.info('{} ticker={} converting pricing to ' 'df orient={}'.format(label, ticker, orient)) try: rec['pricing'] = pricing_dict except Exception as f: rec['pricing'] = '{}' log.info('{} ticker={} failed converting pricing ' 'data={} to df ex={}'.format( label, ticker, ppj(pricing_dict), f)) # try/ex log.info('{} ticker={} done converting pricing to ' 'df orient={}'.format(label, ticker, orient)) else: log.error('{} ticker={} missing quotes_data={}'.format( label, ticker, ticker_results.quotes_data)) # end of if ticker_results.quotes_data log.info('{} ticker={} close={} vol={}'.format( label, ticker, cur_close, cur_volume)) else: log.info('{} skip - getting ticker={} pricing'.format( label, ticker, get_pricing)) # if get_pricing if get_news: log.info('{} getting ticker={} news'.format(label, ticker)) ticker_results.get_news() if ticker_results.news_data: news_list = None try: news_list = ticker_results.news_data log.info('{} ticker={} converting news to ' 'df orient={}'.format(label, ticker, orient)) num_news_rec = len(news_list) rec['news'] = news_list except Exception as f: rec['news'] = '{}' log.info('{} ticker={} failed converting news ' 'data={} to df ex={}'.format( label, ticker, news_list, f)) # try/ex log.info('{} ticker={} done converting news to ' 'df orient={}'.format(label, ticker, orient)) else: log.info('{} ticker={} Yahoo NO news={}'.format( label, ticker, ticker_results.news_data)) # end of if ticker_results.news_data else: log.info('{} skip - getting ticker={} news'.format(label, ticker)) # end if get_news if get_options: get_all_strikes = True if get_all_strikes: cur_strike = None else: if cur_close: cur_strike = int(cur_close) if not cur_strike: cur_strike = 287 log.info('{} ticker={} num_news={} get options close={} ' 'exp_date={} contract={} strike={}'.format( label, ticker, num_news_rec, cur_close, use_date, contract_type, cur_strike)) options_dict = \ yahoo_get_pricing.get_options( ticker=ticker, exp_date_str=use_date, contract_type=contract_type, strike=cur_strike) rec['options'] = '{}' try: log.info('{} ticker={} converting options to ' 'df orient={}'.format(label, ticker, orient)) num_option_calls = options_dict.get('num_calls', None) num_option_puts = options_dict.get('num_puts', None) rec['options'] = { 'exp_date': options_dict.get('exp_date', None), 'calls': options_dict.get('calls', None), 'puts': options_dict.get('puts', None), 'num_calls': num_option_calls, 'num_puts': num_option_puts } rec['calls'] = rec['options'].get('calls', EMPTY_DF_STR) rec['puts'] = rec['options'].get('puts', EMPTY_DF_STR) except Exception as f: rec['options'] = '{}' log.info('{} ticker={} failed converting options ' 'data={} to df ex={}'.format(label, ticker, options_dict, f)) # try/ex log.info('{} ticker={} done converting options to ' 'df orient={} num_calls={} num_puts={}'.format( label, ticker, orient, num_option_calls, num_option_puts)) else: log.info('{} skip - getting ticker={} options'.format( label, ticker)) # end of if get_options log.info('{} yahoo pricing for ticker={} close={} ' 'num_calls={} num_puts={} news={}'.format( label, ticker, cur_close, num_option_calls, num_option_puts, num_news_rec)) fields_to_upload = ['pricing', 'options', 'calls', 'puts', 'news'] for field_name in fields_to_upload: upload_and_cache_req = copy.deepcopy(work_dict) upload_and_cache_req['celery_disabled'] = True upload_and_cache_req['data'] = rec[field_name] if not upload_and_cache_req['data']: upload_and_cache_req['data'] = '{}' if 'redis_key' in work_dict: upload_and_cache_req['redis_key'] = '{}_{}'.format( work_dict.get('redis_key', '{}_{}'.format(ticker, field_name)), field_name) if 's3_key' in work_dict: upload_and_cache_req['s3_key'] = '{}_{}'.format( work_dict.get('s3_key', '{}_{}'.format(ticker, field_name)), field_name) try: update_res = publisher.run_publish_pricing_update( work_dict=upload_and_cache_req) update_status = update_res.get('status', NOT_SET) log.info('{} publish update status={} data={}'.format( label, get_status(status=update_status), update_res)) except Exception as f: err = ('{} - failed to upload YAHOO data={} to ' 'to s3_key={} and redis_key={}'.format( label, upload_and_cache_req, upload_and_cache_req['s3_key'], upload_and_cache_req['redis_key'])) log.error(err) # end of try/ex to upload and cache if not rec[field_name]: log.debug('{} - ticker={} no data from YAHOO for ' 'field_name={}'.format(label, ticker, field_name)) # end of for all fields res = build_result.build_result(status=SUCCESS, err=None, rec=rec) except Exception as e: res = build_result.build_result(status=ERR, err=('failed - get_data_from_yahoo ' 'dict={} with ex={}').format( work_dict, e), rec=rec) log.error('{} - {}'.format(label, res['err'])) # end of try/ex log.info('task - get_data_from_yahoo done - ' '{} - status={}'.format(label, get_status(res['status']))) return res
def run_algo( ticker=None, tickers=None, algo=None, # optional derived ``analysis_engine.algo.Algo`` instance balance=None, # float starting base capital commission=None, # float for single trade commission for buy or sell start_date=None, # string YYYY-MM-DD HH:MM:SS end_date=None, # string YYYY-MM-DD HH:MM:SS datasets=None, # string list of identifiers num_owned_dict=None, # not supported cache_freq='daily', # 'minute' not supported auto_fill=True, load_config=None, report_config=None, history_config=None, extract_config=None, use_key=None, extract_mode='all', iex_datasets=None, redis_enabled=True, redis_address=None, redis_db=None, redis_password=None, redis_expire=None, redis_key=None, s3_enabled=True, s3_address=None, s3_bucket=None, s3_access_key=None, s3_secret_key=None, s3_region_name=None, s3_secure=False, s3_key=None, celery_disabled=True, broker_url=None, result_backend=None, label=None, name=None, timeseries=None, trade_strategy=None, verbose=False, publish_to_slack=True, publish_to_s3=True, publish_to_redis=True, extract_datasets=None, config_file=None, config_dict=None, version=1, raise_on_err=True, **kwargs): """run_algo Run an algorithm with steps: 1) Extract redis keys between dates 2) Compile a data pipeline dictionary (call it ``data``) 3) Call algorithm's ``myalgo.handle_data(data=data)`` .. note:: If no ``algo`` is set, the ``analysis_engine.algo.BaseAlgo`` algorithm is used. .. note:: Please ensure Redis and Minio are running before trying to extract tickers **Stock tickers to extract** :param ticker: single stock ticker/symbol/ETF to extract :param tickers: optional - list of tickers to extract :param use_key: optional - extract historical key from Redis **Algo Configuration** :param algo: derived instance of ``analysis_engine.algo.Algo`` object :param balance: optional - float balance parameter can also be set on the ``algo`` object if not set on the args :param commission: float for single trade commission for buy or sell. can also be set on the ``algo`` objet :param start_date: string ``YYYY-MM-DD_HH:MM:SS`` cache value :param end_date: string ``YYYY-MM-DD_HH:MM:SS`` cache value :param dataset_types: list of strings that are ``iex`` or ``yahoo`` datasets that are cached. :param cache_freq: optional - depending on if you are running data feeds on a ``daily`` cron (default) vs every ``minute`` (or faster) :param num_owned_dict: not supported yet :param auto_fill: optional - boolean for auto filling buy/sell orders for backtesting (default is ``True``) :param trading_calendar: ``trading_calendar.TradingCalendar`` object, by default ``analysis_engine.calendars. always_open.AlwaysOpen`` trading calendar # TradingCalendar by ``TFSExchangeCalendar`` :param config_file: path to a json file containing custom algorithm object member values (like indicator configuration and predict future date units ahead for a backtest) :param config_dict: optional - dictionary that can be passed to derived class implementations of: ``def load_from_config(config_dict=config_dict)`` **Timeseries** :param timeseries: optional - string to set ``day`` or ``minute`` backtesting or live trading (default is ``minute``) **Trading Strategy** :param trade_strategy: optional - string to set the type of ``Trading Strategy`` for backtesting or live trading (default is ``count``) **Algorithm Dataset Loading, Extracting, Reporting and Trading History arguments** :param load_config: optional - dictionary for setting member variables to load an agorithm-ready dataset from a file, s3 or redis :param report_config: optional - dictionary for setting member variables to publish an algo ``trading performance report`` to s3, redis, a file or slack :param history_config: optional - dictionary for setting member variables to publish an algo ``trade history`` to s3, redis, a file or slack :param extract_config: optional - dictionary for setting member variables to publish an algo ``trading performance report`` to s3, redis, a file or slack **(Optional) Data sources, datafeeds and datasets to gather** :param iex_datasets: list of strings for gathering specific `IEX datasets <https://iexcloud.io/>`__ which are set as consts: ``analysis_engine.iex.consts.FETCH_*``. **(Optional) Redis connectivity arguments** :param redis_enabled: bool - toggle for auto-caching all datasets in Redis (default is ``True``) :param redis_address: Redis connection string format is ``host:port`` (default is ``localhost:6379``) :param redis_db: Redis db to use (default is ``0``) :param redis_password: optional - Redis password (default is ``None``) :param redis_expire: optional - Redis expire value (default is ``None``) :param redis_key: optional - redis key not used (default is ``None``) **(Optional) Minio (S3) connectivity arguments** :param s3_enabled: bool - toggle for auto-archiving on Minio (S3) (default is ``True``) :param s3_address: Minio S3 connection string format ``host:port`` (default is ``localhost:9000``) :param s3_bucket: S3 Bucket for storing the artifacts (default is ``dev``) which should be viewable on a browser: http://localhost:9000/minio/dev/ :param s3_access_key: S3 Access key (default is ``trexaccesskey``) :param s3_secret_key: S3 Secret key (default is ``trex123321``) :param s3_region_name: S3 region name (default is ``us-east-1``) :param s3_secure: Transmit using tls encryption (default is ``False``) :param s3_key: optional s3 key not used (default is ``None``) **(Optional) Celery worker broker connectivity arguments** :param celery_disabled: bool - toggle synchronous mode or publish to an engine connected to the `Celery broker and backend <https://github.com/celery/celery#transports-and-backends>`__ (default is ``True`` - synchronous mode without an engine or need for a broker or backend for Celery) :param broker_url: Celery broker url (default is ``redis://0.0.0.0:6379/13``) :param result_backend: Celery backend url (default is ``redis://0.0.0.0:6379/14``) :param label: tracking log label :param publish_to_slack: optional - boolean for publishing to slack (coming soon) :param publish_to_s3: optional - boolean for publishing to s3 (coming soon) :param publish_to_redis: optional - boolean for publishing to redis (coming soon) **(Optional) Debugging** :param verbose: bool - show extract warnings and other debug logging (default is False) :param raise_on_err: optional - boolean for unittests and developing algorithms with the ``analysis_engine.run_algo.run_algo`` helper. When set to ``True`` exceptions will are raised to the calling functions :param kwargs: keyword arguments dictionary """ # dictionary structure with a list sorted on: ascending dates # algo_data_req[ticker][list][dataset] = pd.DataFrame algo_data_req = {} extract_requests = [] return_algo = False # return created algo objects for use by caller rec = {} msg = None use_tickers = tickers use_balance = balance use_commission = commission if ticker: use_tickers = [ticker] else: if not use_tickers: use_tickers = [] # if these are not set as args, but the algo object # has them, use them instead: if algo: if len(use_tickers) == 0: use_tickers = algo.get_tickers() if not use_balance: use_balance = algo.get_balance() if not use_commission: use_commission = algo.get_commission() default_iex_datasets = [ 'daily', 'minute', 'quote', 'stats', 'peers', 'news', 'financials', 'earnings', 'dividends', 'company' ] if not iex_datasets: iex_datasets = default_iex_datasets if redis_enabled: if not redis_address: redis_address = os.getenv('REDIS_ADDRESS', 'localhost:6379') if not redis_password: redis_password = os.getenv('REDIS_PASSWORD', None) if not redis_db: redis_db = int(os.getenv('REDIS_DB', '0')) if not redis_expire: redis_expire = os.getenv('REDIS_EXPIRE', None) if s3_enabled: if not s3_address: s3_address = os.getenv('S3_ADDRESS', 'localhost:9000') if not s3_access_key: s3_access_key = os.getenv('AWS_ACCESS_KEY_ID', 'trexaccesskey') if not s3_secret_key: s3_secret_key = os.getenv('AWS_SECRET_ACCESS_KEY', 'trex123321') if not s3_region_name: s3_region_name = os.getenv('AWS_DEFAULT_REGION', 'us-east-1') if not s3_secure: s3_secure = os.getenv('S3_SECURE', '0') == '1' if not s3_bucket: s3_bucket = os.getenv('S3_BUCKET', 'dev') if not broker_url: broker_url = os.getenv('WORKER_BROKER_URL', 'redis://0.0.0.0:6379/11') if not result_backend: result_backend = os.getenv('WORKER_BACKEND_URL', 'redis://0.0.0.0:6379/12') if not label: label = 'run-algo' num_tickers = len(use_tickers) last_close_str = ae_utils.get_last_close_str() if iex_datasets: if verbose: log.info(f'{label} - tickers={num_tickers} ' f'iex={json.dumps(iex_datasets)}') else: if verbose: log.info(f'{label} - tickers={num_tickers}') ticker_key = use_key if not ticker_key: ticker_key = f'{ticker}_{last_close_str}' if not algo: algo = base_algo.BaseAlgo(ticker=None, tickers=use_tickers, balance=use_balance, commission=use_commission, config_dict=config_dict, name=label, auto_fill=auto_fill, timeseries=timeseries, trade_strategy=trade_strategy, publish_to_slack=publish_to_slack, publish_to_s3=publish_to_s3, publish_to_redis=publish_to_redis, raise_on_err=raise_on_err) return_algo = True # the algo object is stored # in the result at: res['rec']['algo'] if not algo: msg = f'{label} - missing algo object' log.error(msg) return build_result.build_result(status=ae_consts.EMPTY, err=msg, rec=rec) if raise_on_err: log.debug(f'{label} - enabling algo exception raises') algo.raise_on_err = True indicator_datasets = algo.get_indicator_datasets() if len(indicator_datasets) == 0: indicator_datasets = ae_consts.BACKUP_DATASETS log.info(f'using all datasets={indicator_datasets}') verbose_extract = False if config_dict: verbose_extract = config_dict.get('verbose_extract', False) common_vals = {} common_vals['base_key'] = ticker_key common_vals['celery_disabled'] = celery_disabled common_vals['ticker'] = ticker common_vals['label'] = label common_vals['iex_datasets'] = iex_datasets common_vals['s3_enabled'] = s3_enabled common_vals['s3_bucket'] = s3_bucket common_vals['s3_address'] = s3_address common_vals['s3_secure'] = s3_secure common_vals['s3_region_name'] = s3_region_name common_vals['s3_access_key'] = s3_access_key common_vals['s3_secret_key'] = s3_secret_key common_vals['s3_key'] = ticker_key common_vals['redis_enabled'] = redis_enabled common_vals['redis_address'] = redis_address common_vals['redis_password'] = redis_password common_vals['redis_db'] = redis_db common_vals['redis_key'] = ticker_key common_vals['redis_expire'] = redis_expire use_start_date_str = start_date use_end_date_str = end_date last_close_date = ae_utils.last_close() end_date_val = None cache_freq_fmt = ae_consts.COMMON_TICK_DATE_FORMAT if not use_end_date_str: use_end_date_str = last_close_date.strftime(cache_freq_fmt) end_date_val = ae_utils.get_date_from_str(date_str=use_end_date_str, fmt=cache_freq_fmt) start_date_val = None if not use_start_date_str: start_date_val = end_date_val - datetime.timedelta(days=60) use_start_date_str = start_date_val.strftime(cache_freq_fmt) else: start_date_val = datetime.datetime.strptime( use_start_date_str, ae_consts.COMMON_TICK_DATE_FORMAT) total_dates = (end_date_val - start_date_val).days if end_date_val < start_date_val: msg = ( f'{label} - invalid dates - start_date={start_date_val} is after ' f'end_date={end_date_val}') raise Exception(msg) if verbose: log.info(f'{label} - days={total_dates} ' f'start={use_start_date_str} ' f'end={use_end_date_str} ' f'datasets={indicator_datasets}') for ticker in use_tickers: req = algo_utils.build_algo_request(ticker=ticker, use_key=use_key, start_date=use_start_date_str, end_date=use_end_date_str, datasets=datasets, balance=use_balance, cache_freq=cache_freq, timeseries=timeseries, trade_strategy=trade_strategy, label=label) ticker_key = f'{ticker}_{last_close_str}' common_vals['ticker'] = ticker common_vals['base_key'] = ticker_key common_vals['redis_key'] = ticker_key common_vals['s3_key'] = ticker_key for date_key in req['extract_datasets']: date_req = api_requests.get_ds_dict(ticker=ticker, base_key=date_key, ds_id=label, service_dict=common_vals) node_date_key = date_key.replace(f'{ticker}_', '') extract_requests.append({ 'id': date_key, 'ticker': ticker, 'date_key': date_key, 'date': node_date_key, 'req': date_req }) # end of for all ticker in use_tickers first_extract_date = None last_extract_date = None total_extract_requests = len(extract_requests) cur_idx = 1 for idx, extract_node in enumerate(extract_requests): extract_ticker = extract_node['ticker'] extract_date = extract_node['date'] ds_node_id = extract_node['id'] if not first_extract_date: first_extract_date = extract_date last_extract_date = extract_date perc_progress = ae_consts.get_percent_done( progress=cur_idx, total=total_extract_requests) percent_label = (f'{label} ' f'ticker={extract_ticker} ' f'date={extract_date} ' f'{perc_progress} ' f'{idx}/{total_extract_requests} ' f'{indicator_datasets}') if verbose: log.info(f'extracting - {percent_label}') ticker_bt_data = build_ds_node.build_dataset_node( ticker=extract_ticker, date=extract_date, service_dict=common_vals, datasets=indicator_datasets, log_label=label, verbose=verbose_extract) if ticker not in algo_data_req: algo_data_req[ticker] = [] algo_data_req[ticker].append({ 'id': ds_node_id, # id is currently the cache key in redis 'date': extract_date, # used to confirm dates in asc order 'data': ticker_bt_data }) if verbose: log.info(f'extract - {percent_label} ' f'dataset={len(algo_data_req[ticker])}') cur_idx += 1 # end of for service_dict in extract_requests # this could be a separate celery task status = ae_consts.NOT_RUN if len(algo_data_req) == 0: msg = (f'{label} - nothing to test - no data found for ' f'tickers={use_tickers} ' f'between {first_extract_date} and {last_extract_date}') log.info(msg) return build_result.build_result(status=ae_consts.EMPTY, err=msg, rec=rec) # this could be a separate celery task try: if verbose: log.info(f'handle_data START - {percent_label} from ' f'{first_extract_date} to {last_extract_date}') algo.handle_data(data=algo_data_req) if verbose: log.info(f'handle_data END - {percent_label} from ' f'{first_extract_date} to {last_extract_date}') except Exception as e: a_name = algo.get_name() a_debug_msg = algo.get_debug_msg() if not a_debug_msg: a_debug_msg = 'debug message not set' a_config_dict = ae_consts.ppj(algo.config_dict) msg = (f'{percent_label} - algo={a_name} ' f'encountered exception in handle_data tickers={use_tickers} ' f'from {first_extract_date} to {last_extract_date} ex={e} ' f'and failed during operation: {a_debug_msg}') if raise_on_err: if algo: try: ind_obj = \ algo.get_indicator_process_last_indicator() if ind_obj: ind_obj_path = ind_obj.get_path_to_module() ind_obj_config = ae_consts.ppj(ind_obj.get_config()) found_error_hint = False if hasattr(ind_obj.use_df, 'to_json'): if len(ind_obj.use_df.index) == 0: log.critical( f'indicator failure report for ' f'last module: ' f'{ind_obj_path} ' f'indicator={ind_obj.get_name()} ' f'config={ind_obj_config} ' f'dataset={ind_obj.use_df.head(5)} ' f'name_of_dataset={ind_obj.uses_data}') log.critical( '--------------------------------------' '--------------------------------------') log.critical('Please check if this indicator: ' f'{ind_obj_path} ' 'supports Empty Dataframes') log.critical( '--------------------------------------' '--------------------------------------') found_error_hint = True # indicator error hints if not found_error_hint: log.critical( f'indicator failure report for last module: ' f'{ind_obj_path} ' f'indicator={ind_obj.get_name()} ' f'config={ind_obj_config} ' f'dataset={ind_obj.use_df.head(5)} ' f'name_of_dataset={ind_obj.uses_data}') except Exception as f: log.critical(f'failed to pull indicator processor ' f'last indicator for debugging ' f'from ex={e} with parsing ex={f}') # end of ignoring non-supported ways of creating # indicator processors log.error(msg) log.error(f'algo failure report: ' f'algo={a_name} handle_data() ' f'config={a_config_dict} ') log.critical(f'algo failed during operation: {a_debug_msg}') raise e else: log.error(msg) return build_result.build_result(status=ae_consts.ERR, err=msg, rec=rec) # end of try/ex # this could be a separate celery task try: if verbose: log.info(f'get_result START - {percent_label} from ' f'{first_extract_date} to {last_extract_date}') rec = algo.get_result() status = ae_consts.SUCCESS if verbose: log.info(f'get_result END - {percent_label} from ' f'{first_extract_date} to {last_extract_date}') except Exception as e: msg = ( f'{percent_label} - algo={algo.get_name()} encountered exception ' f'in get_result tickers={use_tickers} from ' f'{first_extract_date} to {last_extract_date} ex={e}') if raise_on_err: if algo: log.error(f'algo={algo.get_name()} failed in get_result with ' f'debug_msg={algo.get_debug_msg()}') log.error(msg) raise e else: log.error(msg) return build_result.build_result(status=ae_consts.ERR, err=msg, rec=rec) # end of try/ex if return_algo: rec['algo'] = algo return build_result.build_result(status=status, err=msg, rec=rec)
def extract(ticker=None, tickers=None, use_key=None, extract_mode='all', iex_datasets=None, redis_enabled=True, redis_address=None, redis_db=None, redis_password=None, redis_expire=None, s3_enabled=True, s3_address=None, s3_bucket=None, s3_access_key=None, s3_secret_key=None, s3_region_name=None, s3_secure=False, celery_disabled=True, broker_url=None, result_backend=None, label=None, verbose=False): """extract Extract all cached datasets for a stock ``ticker`` or a list of ``tickers`` and returns a dictionary. Please make sure the datasets are already cached in Redis before running this method. If not please refer to the ``analysis_engine.fetch.fetch`` function to prepare the datasets on your environment. Python example: .. code-block:: python from analysis_engine.extract import extract d = extract(ticker='NFLX') print(d) for k in d['NFLX']: print('dataset key: {}'.format(k)) This was created for reducing the amount of typying in Jupyter notebooks. It can be set up for use with a distributed engine as well with the optional arguments depending on your connectitivty requirements. .. note:: Please ensure Redis and Minio are running before trying to extract tickers **Stock tickers to extract** :param ticker: single stock ticker/symbol/ETF to extract :param tickers: optional - list of tickers to extract :param use_key: optional - extract historical key from Redis usually formatted ``<TICKER>_<date formatted YYYY-MM-DD>`` **(Optional) Data sources, datafeeds and datasets to gather** :param iex_datasets: list of strings for gathering specific `IEX datasets <https://iextrading.com/developer/docs/#stocks>`__ which are set as consts: ``analysis_engine.iex.consts.FETCH_*``. **(Optional) Redis connectivity arguments** :param redis_enabled: bool - toggle for auto-caching all datasets in Redis (default is ``True``) :param redis_address: Redis connection string format: ``host:port`` (default is ``localhost:6379``) :param redis_db: Redis db to use (default is ``0``) :param redis_password: optional - Redis password (default is ``None``) :param redis_expire: optional - Redis expire value (default is ``None``) **(Optional) Minio (S3) connectivity arguments** :param s3_enabled: bool - toggle for auto-archiving on Minio (S3) (default is ``True``) :param s3_address: Minio S3 connection string format: ``host:port`` (default is ``localhost:9000``) :param s3_bucket: S3 Bucket for storing the artifacts (default is ``dev``) which should be viewable on a browser: http://localhost:9000/minio/dev/ :param s3_access_key: S3 Access key (default is ``trexaccesskey``) :param s3_secret_key: S3 Secret key (default is ``trex123321``) :param s3_region_name: S3 region name (default is ``us-east-1``) :param s3_secure: Transmit using tls encryption (default is ``False``) **(Optional) Celery worker broker connectivity arguments** :param celery_disabled: bool - toggle synchronous mode or publish to an engine connected to the `Celery broker and backend <https://github.com/celery/celery#transports-and-backends>`__ (default is ``True`` - synchronous mode without an engine or need for a broker or backend for Celery) :param broker_url: Celery broker url (default is ``redis://0.0.0.0:6379/13``) :param result_backend: Celery backend url (default is ``redis://0.0.0.0:6379/14``) :param label: tracking log label **(Optional) Debugging** :param verbose: bool - show extract warnings and other debug logging (default is False) **Supported environment variables** :: export REDIS_ADDRESS="localhost:6379" export REDIS_DB="0" export S3_ADDRESS="localhost:9000" export S3_BUCKET="dev" export AWS_ACCESS_KEY_ID="trexaccesskey" export AWS_SECRET_ACCESS_KEY="trex123321" export AWS_DEFAULT_REGION="us-east-1" export S3_SECURE="0" export WORKER_BROKER_URL="redis://0.0.0.0:6379/13" export WORKER_BACKEND_URL="redis://0.0.0.0:6379/14" """ rec = {} extract_requests = [] use_tickers = tickers if ticker: use_tickers = [ticker] else: if not use_tickers: use_tickers = [] default_iex_datasets = [ 'daily', 'minute', 'quote', 'stats', 'peers', 'news', 'financials', 'earnings', 'dividends', 'company' ] if not iex_datasets: iex_datasets = default_iex_datasets if redis_enabled: if not redis_address: redis_address = os.getenv('REDIS_ADDRESS', 'localhost:6379') if not redis_password: redis_password = os.getenv('REDIS_PASSWORD', None) if not redis_db: redis_db = int(os.getenv('REDIS_DB', '0')) if not redis_expire: redis_expire = os.getenv('REDIS_EXPIRE', None) if s3_enabled: if not s3_address: s3_address = os.getenv('S3_ADDRESS', 'localhost:9000') if not s3_access_key: s3_access_key = os.getenv('AWS_ACCESS_KEY_ID', 'trexaccesskey') if not s3_secret_key: s3_secret_key = os.getenv('AWS_SECRET_ACCESS_KEY', 'trex123321') if not s3_region_name: s3_region_name = os.getenv('AWS_DEFAULT_REGION', 'us-east-1') if not s3_secure: s3_secure = os.getenv('S3_SECURE', '0') == '1' if not s3_bucket: s3_bucket = os.getenv('S3_BUCKET', 'dev') if not broker_url: broker_url = os.getenv('WORKER_BROKER_URL', 'redis://0.0.0.0:6379/13') if not result_backend: result_backend = os.getenv('WORKER_BACKEND_URL', 'redis://0.0.0.0:6379/14') if not label: label = 'get-latest' num_tickers = len(use_tickers) last_close_str = ae_utils.get_last_close_str() if iex_datasets: log.info('{} - getting latest for tickers={} ' 'iex={}'.format(label, num_tickers, json.dumps(iex_datasets))) else: log.info('{} - getting latest for tickers={}'.format( label, num_tickers)) ticker_key = use_key if not ticker_key: ticker_key = '{}_{}'.format(ticker, last_close_str) common_vals = {} common_vals['base_key'] = ticker_key common_vals['celery_disabled'] = celery_disabled common_vals['ticker'] = ticker common_vals['label'] = label common_vals['iex_datasets'] = iex_datasets common_vals['s3_enabled'] = s3_enabled common_vals['s3_bucket'] = s3_bucket common_vals['s3_address'] = s3_address common_vals['s3_secure'] = s3_secure common_vals['s3_region_name'] = s3_region_name common_vals['s3_access_key'] = s3_access_key common_vals['s3_secret_key'] = s3_secret_key common_vals['s3_key'] = ticker_key common_vals['redis_enabled'] = redis_enabled common_vals['redis_address'] = redis_address common_vals['redis_password'] = redis_password common_vals['redis_db'] = redis_db common_vals['redis_key'] = ticker_key common_vals['redis_expire'] = redis_expire common_vals['redis_address'] = redis_address common_vals['s3_address'] = s3_address log.info('{} - extract ticker={} last_close={} base_key={} ' 'redis_address={} s3_address={}'.format( label, ticker, last_close_str, common_vals['base_key'], common_vals['redis_address'], common_vals['s3_address'])) """ Extract Datasets """ iex_daily_status = ae_consts.FAILED iex_minute_status = ae_consts.FAILED iex_quote_status = ae_consts.FAILED iex_stats_status = ae_consts.FAILED iex_peers_status = ae_consts.FAILED iex_news_status = ae_consts.FAILED iex_financials_status = ae_consts.FAILED iex_earnings_status = ae_consts.FAILED iex_dividends_status = ae_consts.FAILED iex_company_status = ae_consts.FAILED yahoo_news_status = ae_consts.FAILED yahoo_options_status = ae_consts.FAILED yahoo_pricing_status = ae_consts.FAILED td_calls_status = ae_consts.FAILED td_puts_status = ae_consts.FAILED iex_daily_df = None iex_minute_df = None iex_quote_df = None iex_stats_df = None iex_peers_df = None iex_news_df = None iex_financials_df = None iex_earnings_df = None iex_dividends_df = None iex_company_df = None yahoo_option_calls_df = None yahoo_option_puts_df = None yahoo_pricing_df = None yahoo_news_df = None td_calls_df = None td_puts_df = None for ticker in use_tickers: req = api_requests.get_ds_dict(ticker=ticker, base_key=common_vals['base_key'], ds_id=label, service_dict=common_vals) extract_requests.append(req) # end of for all ticker in use_tickers extract_iex = True if extract_mode not in ['all', 'iex']: extract_iex = False extract_yahoo = True if extract_mode not in ['all', 'yahoo']: extract_yahoo = False extract_td = True if extract_mode not in ['all', 'td']: extract_td = False for extract_req in extract_requests: if 'daily' in iex_datasets or extract_iex: iex_daily_status, iex_daily_df = \ iex_extract_utils.extract_daily_dataset( extract_req) if iex_daily_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract iex_daily={}'.format(ticker)) if 'minute' in iex_datasets or extract_iex: iex_minute_status, iex_minute_df = \ iex_extract_utils.extract_minute_dataset( extract_req) if iex_minute_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract iex_minute={}'.format(ticker)) if 'quote' in iex_datasets or extract_iex: iex_quote_status, iex_quote_df = \ iex_extract_utils.extract_quote_dataset( extract_req) if iex_quote_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract iex_quote={}'.format(ticker)) if 'stats' in iex_datasets or extract_iex: iex_stats_df, iex_stats_df = \ iex_extract_utils.extract_stats_dataset( extract_req) if iex_stats_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract iex_stats={}'.format(ticker)) if 'peers' in iex_datasets or extract_iex: iex_peers_df, iex_peers_df = \ iex_extract_utils.extract_peers_dataset( extract_req) if iex_peers_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract iex_peers={}'.format(ticker)) if 'news' in iex_datasets or extract_iex: iex_news_status, iex_news_df = \ iex_extract_utils.extract_news_dataset( extract_req) if iex_news_status != ae_consts.SUCCESS: if verbose: log.warning('unable to extract iex_news={}'.format(ticker)) if 'financials' in iex_datasets or extract_iex: iex_financials_status, iex_financials_df = \ iex_extract_utils.extract_financials_dataset( extract_req) if iex_financials_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract iex_financials={}'.format(ticker)) if 'earnings' in iex_datasets or extract_iex: iex_earnings_status, iex_earnings_df = \ iex_extract_utils.extract_earnings_dataset( extract_req) if iex_earnings_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract iex_earnings={}'.format(ticker)) if 'dividends' in iex_datasets or extract_iex: iex_dividends_status, iex_dividends_df = \ iex_extract_utils.extract_dividends_dataset( extract_req) if iex_dividends_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract iex_dividends={}'.format(ticker)) if 'company' in iex_datasets or extract_iex: iex_company_status, iex_company_df = \ iex_extract_utils.extract_company_dataset( extract_req) if iex_company_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract iex_company={}'.format(ticker)) # end of iex extracts if extract_yahoo: yahoo_options_status, yahoo_option_calls_df = \ yahoo_extract_utils.extract_option_calls_dataset( extract_req) yahoo_options_status, yahoo_option_puts_df = \ yahoo_extract_utils.extract_option_puts_dataset( extract_req) if yahoo_options_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract yahoo_options={}'.format(ticker)) yahoo_pricing_status, yahoo_pricing_df = \ yahoo_extract_utils.extract_pricing_dataset( extract_req) if yahoo_pricing_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract yahoo_pricing={}'.format(ticker)) yahoo_news_status, yahoo_news_df = \ yahoo_extract_utils.extract_yahoo_news_dataset( extract_req) if yahoo_news_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract yahoo_news={}'.format(ticker)) # end of yahoo extracts if extract_td: td_calls_status, td_calls_df = \ td_extract_utils.extract_option_calls_dataset( extract_req) if td_calls_status != ae_consts.SUCCESS: if verbose: log.warning('unable to extract tdcalls={}'.format(ticker)) td_puts_status, td_puts_df = \ td_extract_utils.extract_option_puts_dataset( extract_req) if td_puts_status != ae_consts.SUCCESS: if verbose: log.warning('unable to extract tdputs={}'.format(ticker)) # td extracts ticker_data = {} ticker_data['daily'] = iex_daily_df ticker_data['minute'] = iex_minute_df ticker_data['quote'] = iex_quote_df ticker_data['stats'] = iex_stats_df ticker_data['peers'] = iex_peers_df ticker_data['news1'] = iex_news_df ticker_data['financials'] = iex_financials_df ticker_data['earnings'] = iex_earnings_df ticker_data['dividends'] = iex_dividends_df ticker_data['company'] = iex_company_df ticker_data['calls'] = yahoo_option_calls_df ticker_data['puts'] = yahoo_option_puts_df ticker_data['pricing'] = yahoo_pricing_df ticker_data['news'] = yahoo_news_df ticker_data['tdcalls'] = td_calls_df ticker_data['tdputs'] = td_puts_df rec[ticker] = ticker_data # end of for service_dict in extract_requests return rec
def latest(self, date_str=None, start_row=-200, extract_iex=True, extract_yahoo=False, extract_td=True, verbose=False, **kwargs): """latest Run the algorithm with the latest pricing data. Also supports running a backtest for a historical date in the pricing history (format ``YYYY-MM-DD``) :param date_str: optional - string start date ``YYYY-MM-DD`` default is the latest close date :param start_row: negative number of rows back from the end of the list in the data default is ``-200`` where this means the algorithm will process the latest 200 rows in the minute dataset :param extract_iex: bool flag for extracting from ``IEX`` :param extract_yahoo: bool flag for extracting from ``Yahoo`` which is disabled as of 1/2019 :param extract_td: bool flag for extracting from ``Tradier`` :param verbose: bool flag for logs :param kwargs: keyword arg dict """ use_date_str = date_str if not use_date_str: use_date_str = ae_utils.get_last_close_str() log.info('creating algo') self.algo_obj = base_algo.BaseAlgo( ticker=self.config_dict['ticker'], balance=self.config_dict['balance'], commission=self.config_dict['commission'], name=self.use_name, start_date=self.use_start_date, end_date=self.use_end_date, auto_fill=self.auto_fill, config_dict=self.config_dict, load_from_s3_bucket=self.load_from_s3_bucket, load_from_s3_key=self.load_from_s3_key, load_from_redis_key=self.load_from_redis_key, load_from_file=self.load_from_file, load_compress=self.load_compress, load_publish=self.load_publish, load_config=self.load_config, report_redis_key=self.report_redis_key, report_s3_bucket=self.report_s3_bucket, report_s3_key=self.report_s3_key, report_file=self.report_file, report_compress=self.report_compress, report_publish=self.report_publish, report_config=self.report_config, history_redis_key=self.history_redis_key, history_s3_bucket=self.history_s3_bucket, history_s3_key=self.history_s3_key, history_file=self.history_file, history_compress=self.history_compress, history_publish=self.history_publish, history_config=self.history_config, extract_redis_key=self.extract_redis_key, extract_s3_bucket=self.extract_s3_bucket, extract_s3_key=self.extract_s3_key, extract_file=self.extract_file, extract_save_dir=self.extract_save_dir, extract_compress=self.extract_compress, extract_publish=self.extract_publish, extract_config=self.extract_config, publish_to_slack=self.publish_to_slack, publish_to_s3=self.publish_to_s3, publish_to_redis=self.publish_to_redis, dataset_type=self.dataset_type, serialize_datasets=self.serialize_datasets, compress=self.compress, encoding=self.encoding, redis_enabled=self.redis_enabled, redis_key=self.redis_key, redis_address=self.redis_address, redis_db=self.redis_db, redis_password=self.redis_password, redis_expire=self.redis_expire, redis_serializer=self.redis_serializer, redis_encoding=self.redis_encoding, s3_enabled=self.s3_enabled, s3_key=self.s3_key, s3_address=self.s3_address, s3_bucket=self.s3_bucket, s3_access_key=self.s3_access_key, s3_secret_key=self.s3_secret_key, s3_region_name=self.s3_region_name, s3_secure=self.s3_secure, slack_enabled=self.slack_enabled, slack_code_block=self.slack_code_block, slack_full_width=self.slack_full_width, dataset_publish_extract=self.extract_publish, dataset_publish_history=self.history_publish, dataset_publish_report=self.report_publish, run_on_engine=self.run_on_engine, auth_url=self.broker_url, backend_url=self.backend_url, include_tasks=self.include_tasks, ssl_options=self.ssl_options, transport_options=self.transport_options, path_to_config_module=self.path_to_config_module, timeseries=self.timeseries, trade_strategy=self.trade_strategy, verbose=False, raise_on_err=self.raise_on_err) log.info('run latest - start') ticker = self.config_dict['ticker'] dataset_id = f'{ticker}_{use_date_str}' self.common_fetch_vals['base_key'] = dataset_id verbose_extract = self.config_dict.get('verbose_extract', False) indicator_datasets = self.algo_obj.get_indicator_datasets() ticker_data = build_dataset_node.build_dataset_node( ticker=ticker, date=use_date_str, datasets=indicator_datasets, service_dict=self.common_fetch_vals, verbose=verbose_extract) algo_data_req = { ticker: [{ 'id': dataset_id, # id is currently the cache key in redis 'date': use_date_str, # used to confirm dates in asc order 'data': ticker_data, 'start_row': start_row }] } if verbose: log.info(f'extract - {dataset_id} ' f'dataset={len(algo_data_req[ticker])}') # this could be a separate celery task try: if verbose: log.info(f'handle_data START - {dataset_id}') self.algo_obj.handle_data(data=algo_data_req) if verbose: log.info(f'handle_data END - {dataset_id}') except Exception as e: a_name = self.algo_obj.get_name() a_debug_msg = self.algo_obj.get_debug_msg() if not a_debug_msg: a_debug_msg = 'debug message not set' # a_config_dict = ae_consts.ppj(self.algo_obj.config_dict) msg = (f'{dataset_id} - algo={a_name} ' f'encountered exception in handle_data tickers={ticker} ' f'from ex={e} ' f'and failed during operation: {a_debug_msg}') log.critical(f'{msg}') # end try/ex log.info('run latest - create history') history_ds = self.algo_obj.create_history_dataset() self.history_df = pd.DataFrame(history_ds[ticker]) self.determine_latest_times_in_history() self.num_rows = len(self.history_df.index) if verbose: log.info(self.history_df[['minute', 'close']].tail(5)) log.info(f'run latest minute={self.end_date} - ' f'rows={self.num_rows} - done') return self.get_history()
def fetch_calls(ticker=None, work_dict=None, scrub_mode='sort-by-date', verbose=False): """fetch_calls Fetch Tradier option calls for a ticker and return a tuple: (status, ``pandas.DataFrame``) .. code-block:: python import analysis_engine.td.fetch_api as td_fetch # Please set the TD_TOKEN environment variable to your token calls_status, calls_df = td_fetch.fetch_calls( ticker='SPY') print(f'Fetched SPY Option Calls from Tradier status={calls_status}:') print(calls_df) :param ticker: string ticker to fetch :param work_dict: dictionary of args used by the automation :param scrub_mode: optional - string type of scrubbing handler to run :param verbose: optional - bool for debugging """ label = 'fetch_calls' datafeed_type = td_consts.DATAFEED_TD_CALLS exp_date = None latest_pricing = {} latest_close = None if work_dict: ticker = work_dict.get('ticker', ticker) label = work_dict.get('label', label) exp_date = work_dict.get('exp_date', exp_date) latest_pricing = work_dict.get('latest_pricing', latest_pricing) latest_close = latest_pricing.get('close', latest_close) log.debug(f'{label} - calls - close={latest_close} ' f'ticker={ticker}') exp_date = opt_dates.option_expiration().strftime( ae_consts.COMMON_DATE_FORMAT) use_url = td_consts.TD_URLS['options'].format(ticker, exp_date) headers = td_consts.get_auth_headers() session = requests.Session() session.headers = headers res = url_helper.url_helper(sess=session).get(use_url) if res.status_code != requests.codes.OK: if res.status_code in [401, 403]: log.critical('Please check the TD_TOKEN is correct ' f'received {res.status_code} during ' 'fetch for: calls') else: log.info(f'failed to get call with response={res} ' f'code={res.status_code} ' f'text={res.text}') return ae_consts.EMPTY, pd.DataFrame([{}]) records = json.loads(res.text) org_records = records.get('options', {}).get('option', []) if len(org_records) == 0: log.info('failed to get call records ' 'text={}'.format(res.text)) return ae_consts.EMPTY, pd.DataFrame([{}]) options_list = [] # assumes UTC conversion will work with the system clock created_minute = ( datetime.datetime.utcnow() - datetime.timedelta(hours=5)).strftime('%Y-%m-%d %H:%M:00') last_close_date = ae_utils.get_last_close_str(fmt='%Y-%m-%d %H:%M:00') # hit bug where dates were None if not last_close_date: last_close_date = created_minute for node in org_records: node['date'] = last_close_date node['created'] = created_minute node['ticker'] = ticker if (node['option_type'] == 'call' and node['expiration_type'] == 'standard' and float(node['bid']) > 0.01): node['opt_type'] = int(ae_consts.OPTION_CALL) node['exp_date'] = node['expiration_date'] new_node = {} for col in td_consts.TD_OPTION_COLUMNS: if col in node: if col in td_consts.TD_EPOCH_COLUMNS: # trade_date can be None if node[col] == 0: new_node[col] = None else: new_node[col] = ae_utils.epoch_to_dt( epoch=node[col] / 1000, use_utc=False, convert_to_est=True).strftime( ae_consts.COMMON_TICK_DATE_FORMAT) """ Debug epoch ms converter: """ """ print('-----------') print(col) print(node[col]) print(new_node[col]) print('===========') """ # if/else valid date else: new_node[col] = node[col] # if date column to convert # if column is in the row # convert all columns options_list.append(new_node) # end of records full_df = pd.DataFrame(options_list).sort_values(by=['strike'], ascending=True) num_chains = len(full_df.index) df = None if latest_close: df_filter = ((full_df['strike'] >= (latest_close - ae_consts.OPTIONS_LOWER_STRIKE)) & (full_df['strike'] <= (latest_close + ae_consts.OPTIONS_UPPER_STRIKE))) df = full_df[df_filter].copy().sort_values( by=['date', 'strike']).reset_index() else: mid_chain_idx = int(num_chains / 2) low_idx = int(mid_chain_idx - ae_consts.MAX_OPTIONS_LOWER_STRIKE) high_idx = int(mid_chain_idx + ae_consts.MAX_OPTIONS_UPPER_STRIKE) if low_idx < 0: low_idx = 0 if high_idx > num_chains: high_idx = num_chains df = full_df[low_idx:high_idx].copy().sort_values( by=['date', 'strike']).reset_index() scrubbed_df = scrub_utils.ingress_scrub_dataset( label=label, scrub_mode=scrub_mode, datafeed_type=datafeed_type, msg_format='df={} date_str={}', ds_id=ticker, date_str=exp_date, df=df) return ae_consts.SUCCESS, scrubbed_df
def setUp( self): """setUp""" self.ticker = 'AAPL' self.last_close_str = get_last_close_str(fmt=COMMON_DATE_FORMAT)
def build_dataset_node(ticker, datasets, date=None, service_dict=None, log_label=None, redis_enabled=True, redis_address=None, redis_db=None, redis_password=None, redis_expire=None, redis_key=None, s3_enabled=True, s3_address=None, s3_bucket=None, s3_access_key=None, s3_secret_key=None, s3_region_name=None, s3_secure=False, s3_key=None, verbose=False): """build_dataset_node Helper for building a dictionary that of cached datasets from redis. The datasets should be built from off the algorithm's config indicators ``uses_data`` fields which if not set will default to ``minute`` data :param ticker: string ticker :param datasets: list of string dataset names to extract from redis :param date: optional - string datetime formatted ``YYYY-MM-DD`` (default is last trading close date) :param service_dict: optional - dictionary for all service connectivity to Redis and Minio if not set the arguments for all ``s3_*`` and ``redis_*`` will be used to lookup data in Redis and Minio **(Optional) Redis connectivity arguments** :param redis_enabled: bool - toggle for auto-caching all datasets in Redis (default is ``True``) :param redis_address: Redis connection string format is ``host:port`` (default is ``localhost:6379``) :param redis_db: Redis db to use (default is ``0``) :param redis_password: optional - Redis password (default is ``None``) :param redis_expire: optional - Redis expire value (default is ``None``) :param redis_key: optional - redis key not used (default is ``None``) :param s3_enabled: bool - toggle for turning on/off Minio or AWS S3 (default is ``True``) :param s3_address: Minio S3 connection string address format is ``host:port`` (default is ``localhost:9000``) :param s3_bucket: S3 Bucket for storing the artifacts (default is ``dev``) which should be viewable on a browser: http://localhost:9000/minio/dev/ :param s3_access_key: S3 Access key (default is ``trexaccesskey``) :param s3_secret_key: S3 Secret key (default is ``trex123321``) :param s3_region_name: S3 region name (default is ``us-east-1``) :param s3_secure: Transmit using tls encryption (default is ``False``) :param s3_key: optional s3 key not used (default is ``None``) **Debugging** :param log_label: optional - log label string :param verbose: optional - flag for debugging (default to ``False``) """ label = log_label if not label: label = 'build_bt' if not date: date = ae_utils.get_last_close_str() td_convert_to_datetime = (ae_consts.TRADIER_CONVERT_TO_DATETIME) date_key = f'{ticker}_{date}' base_req = api_requests.get_ds_dict(ticker=ticker, base_key=date_key, ds_id=label, service_dict=service_dict) if not service_dict: base_req['redis_enabled'] = redis_enabled base_req['redis_address'] = redis_address base_req['redis_password'] = redis_password base_req['redis_db'] = redis_db base_req['redis_key'] = date_key base_req['redis_expire'] = redis_expire base_req['s3_enabled'] = s3_enabled base_req['s3_bucket'] = s3_bucket base_req['s3_address'] = s3_address base_req['s3_secure'] = s3_secure base_req['s3_region_name'] = s3_region_name base_req['s3_access_key'] = s3_access_key base_req['s3_secret_key'] = s3_secret_key base_req['s3_key'] = date_key if verbose: log.info(f'extracting {date_key}') """ for showing connectivity args in the logs log.debug( f'bt {date_key} {ae_consts.ppj(base_req)}') """ iex_daily_status = ae_consts.FAILED iex_minute_status = ae_consts.FAILED iex_quote_status = ae_consts.FAILED iex_stats_status = ae_consts.FAILED iex_peers_status = ae_consts.FAILED iex_news_status = ae_consts.FAILED iex_financials_status = ae_consts.FAILED iex_earnings_status = ae_consts.FAILED iex_dividends_status = ae_consts.FAILED iex_company_status = ae_consts.FAILED td_calls_status = ae_consts.FAILED td_puts_status = ae_consts.FAILED iex_daily_df = None iex_minute_df = None iex_quote_df = None iex_stats_df = None iex_peers_df = None iex_news_df = None iex_financials_df = None iex_earnings_df = None iex_dividends_df = None iex_company_df = None td_calls_df = None td_puts_df = None if 'daily' in datasets: iex_daily_status, iex_daily_df = \ iex_extract_utils.extract_daily_dataset( base_req) if iex_daily_status != ae_consts.SUCCESS: if verbose: log.warn(f'unable to extract iex_daily={ticker}') if 'minute' in datasets: iex_minute_status, iex_minute_df = \ iex_extract_utils.extract_minute_dataset( base_req) if iex_minute_status != ae_consts.SUCCESS: if verbose: log.warn(f'unable to extract iex_minute={ticker}') if 'quote' in datasets: iex_quote_status, iex_quote_df = \ iex_extract_utils.extract_quote_dataset( base_req) if iex_quote_status != ae_consts.SUCCESS: if verbose: log.warn(f'unable to extract iex_quote={ticker}') if 'stats' in datasets: iex_stats_df, iex_stats_df = \ iex_extract_utils.extract_stats_dataset( base_req) if iex_stats_status != ae_consts.SUCCESS: if verbose: log.warn(f'unable to extract iex_stats={ticker}') if 'peers' in datasets: iex_peers_df, iex_peers_df = \ iex_extract_utils.extract_peers_dataset( base_req) if iex_peers_status != ae_consts.SUCCESS: if verbose: log.warn(f'unable to extract iex_peers={ticker}') if 'news' in datasets: iex_news_status, iex_news_df = \ iex_extract_utils.extract_news_dataset( base_req) if iex_news_status != ae_consts.SUCCESS: if verbose: log.warn(f'unable to extract iex_news={ticker}') if 'financials' in datasets: iex_financials_status, iex_financials_df = \ iex_extract_utils.extract_financials_dataset( base_req) if iex_financials_status != ae_consts.SUCCESS: if verbose: log.warn(f'unable to extract iex_financials={ticker}') if 'earnings' in datasets: iex_earnings_status, iex_earnings_df = \ iex_extract_utils.extract_earnings_dataset( base_req) if iex_earnings_status != ae_consts.SUCCESS: if verbose: log.warn(f'unable to extract iex_earnings={ticker}') if 'dividends' in datasets: iex_dividends_status, iex_dividends_df = \ iex_extract_utils.extract_dividends_dataset( base_req) if iex_dividends_status != ae_consts.SUCCESS: if verbose: log.warn(f'unable to extract iex_dividends={ticker}') if 'company' in datasets: iex_company_status, iex_company_df = \ iex_extract_utils.extract_company_dataset( base_req) if iex_company_status != ae_consts.SUCCESS: if verbose: log.warn(f'unable to extract iex_company={ticker}') # end of iex extracts """ Yahoo disabled on Jan 2019 yahoo_news_status = ae_consts.FAILED yahoo_options_status = ae_consts.FAILED yahoo_pricing_status = ae_consts.FAILED yahoo_option_calls_df = None yahoo_option_puts_df = None yahoo_pricing_df = None yahoo_news_df = None if 'options' in datasets: yahoo_options_status, yahoo_option_calls_df = \ yahoo_extract_utils.extract_option_calls_dataset( base_req) yahoo_options_status, yahoo_option_puts_df = \ yahoo_extract_utils.extract_option_puts_dataset( base_req) if yahoo_options_status != ae_consts.SUCCESS: if verbose: log.warn(f'unable to extract yahoo_options={ticker}') if 'pricing' in datasets: yahoo_pricing_status, yahoo_pricing_df = \ yahoo_extract_utils.extract_pricing_dataset( base_req) if yahoo_pricing_status != ae_consts.SUCCESS: if verbose: log.warn(f'unable to extract yahoo_pricing={ticker}') if 'news' in datasets: yahoo_news_status, yahoo_news_df = \ yahoo_extract_utils.extract_yahoo_news_dataset( base_req) if yahoo_news_status != ae_consts.SUCCESS: if verbose: log.warn(f'unable to extract yahoo_news={ticker}') # end of yahoo extracts """ """ Tradier Extraction Debug by setting: base_req['verbose_td'] = True """ if ('calls' in datasets or 'tdcalls' in datasets): td_calls_status, td_calls_df = \ td_extract_utils.extract_option_calls_dataset( base_req) if td_calls_status != ae_consts.SUCCESS: if verbose: log.warn(f'unable to extract tdcalls={ticker}') else: if ae_consts.is_df(df=td_calls_df): for c in td_convert_to_datetime: if c in td_calls_df: td_calls_df[c] = pd.to_datetime( td_calls_df[c], format=ae_consts.COMMON_TICK_DATE_FORMAT) if 'date' in td_calls_df: td_calls_df.sort_values('date', ascending=True) # end of converting dates # end of Tradier calls extraction if ('puts' in datasets or 'tdputs' in datasets): td_puts_status, td_puts_df = \ td_extract_utils.extract_option_puts_dataset( base_req) if td_puts_status != ae_consts.SUCCESS: if verbose: log.warn(f'unable to extract tdputs={ticker}') else: if ae_consts.is_df(df=td_puts_df): for c in td_convert_to_datetime: if c in td_puts_df: td_puts_df[c] = pd.to_datetime( td_puts_df[c], format=ae_consts.COMMON_TICK_DATE_FORMAT) if 'date' in td_puts_df: td_puts_df.sort_values('date', ascending=True) # end of converting dates # end of Tradier puts extraction ticker_data = { 'daily': iex_daily_df, 'minute': iex_minute_df, 'quote': iex_quote_df, 'stats': iex_stats_df, 'peers': iex_peers_df, 'news1': iex_news_df, 'financials': iex_financials_df, 'earnings': iex_earnings_df, 'dividends': iex_dividends_df, 'company': iex_company_df, 'tdcalls': td_calls_df, 'tdputs': td_puts_df, 'calls': None, # yahoo - here for legacy 'news': None, # yahoo - here for legacy 'pricing': None, # yahoo - here for legacy 'puts': None # yahoo - here for legacy } return ticker_data
def get_ds_dict(ticker, base_key=None, ds_id=None, label=None, service_dict=None): """get_ds_dict Get a dictionary with all cache keys for a ticker and return the dictionary. Use this method to decouple your apps from the underlying cache key implementations (if you do not need them). :param ticker: ticker :param base_key: optional - base key that is prepended in all cache keys :param ds_id: optional - dataset id (useful for external database id) :param label: optional - tracking label in the logs :param service_dict: optional - parent call functions and Celery tasks can use this dictionary to seed the common service routes and endpoints. Refer to ``analysis_engine.consts.SERVICE_VALS`` for automatically-copied over keys by this helper. """ if not ticker: raise Exception('please pass in a ticker') use_base_key = base_key if not use_base_key: use_base_key = '{}_{}'.format( ticker, ae_utils.get_last_close_str(fmt=ae_consts.COMMON_DATE_FORMAT)) date_str = ae_utils.utc_date_str(fmt=ae_consts.COMMON_DATE_FORMAT) now_str = ae_utils.utc_now_str(fmt=ae_consts.COMMON_TICK_DATE_FORMAT) daily_redis_key = '{}_{}'.format(use_base_key, ae_consts.DAILY_S3_BUCKET_NAME) minute_redis_key = '{}_{}'.format(use_base_key, ae_consts.MINUTE_S3_BUCKET_NAME) quote_redis_key = '{}_{}'.format(use_base_key, ae_consts.QUOTE_S3_BUCKET_NAME) stats_redis_key = '{}_{}'.format(use_base_key, ae_consts.STATS_S3_BUCKET_NAME) peers_redis_key = '{}_{}'.format(use_base_key, ae_consts.PEERS_S3_BUCKET_NAME) news_iex_redis_key = '{}_{}1'.format(use_base_key, ae_consts.NEWS_S3_BUCKET_NAME) financials_redis_key = '{}_{}'.format(use_base_key, ae_consts.FINANCIALS_S3_BUCKET_NAME) earnings_redis_key = '{}_{}'.format(use_base_key, ae_consts.EARNINGS_S3_BUCKET_NAME) dividends_redis_key = '{}_{}'.format(use_base_key, ae_consts.DIVIDENDS_S3_BUCKET_NAME) company_redis_key = '{}_{}'.format(use_base_key, ae_consts.COMPANY_S3_BUCKET_NAME) options_yahoo_redis_key = '{}_{}'.format(use_base_key, ae_consts.OPTIONS_S3_BUCKET_NAME) call_options_yahoo_redis_key = '{}_calls'.format(use_base_key) put_options_yahoo_redis_key = '{}_puts'.format(use_base_key) pricing_yahoo_redis_key = '{}_{}'.format(use_base_key, ae_consts.PRICING_S3_BUCKET_NAME) news_yahoo_redis_key = '{}_{}'.format(use_base_key, ae_consts.NEWS_S3_BUCKET_NAME) call_options_td_redis_key = '{}_tdcalls'.format(use_base_key) put_options_td_redis_key = '{}_tdputs'.format(use_base_key) ds_cache_dict = { 'daily': daily_redis_key, 'minute': minute_redis_key, 'quote': quote_redis_key, 'stats': stats_redis_key, 'peers': peers_redis_key, 'news1': news_iex_redis_key, 'financials': financials_redis_key, 'earnings': earnings_redis_key, 'dividends': dividends_redis_key, 'company': company_redis_key, 'options': options_yahoo_redis_key, 'calls': call_options_yahoo_redis_key, 'puts': put_options_yahoo_redis_key, 'pricing': pricing_yahoo_redis_key, 'news': news_yahoo_redis_key, 'tdcalls': call_options_td_redis_key, 'tdputs': put_options_td_redis_key, 'ticker': ticker, 'ds_id': ds_id, 'label': label, 'created': now_str, 'date': date_str, 'manifest_key': use_base_key, 'version': ae_consts.CACHE_DICT_VERSION } # set keys/values for redis/minio from the # service_dict - helper method for # launching job chains if service_dict: for k in ae_consts.SERVICE_VALS: ds_cache_dict[k] = service_dict[k] return ds_cache_dict
def extract_option_puts_dataset(ticker=None, date=None, work_dict=None, scrub_mode='sort-by-date', verbose=False): """extract_option_puts_dataset Extract the TD options puts for a ticker and return a tuple (status, ``pandas.Dataframe``) .. code-block:: python import analysis_engine.td.extract_df_from_redis as td_extract # extract by historical date is also supported as an arg # date='2019-02-15' puts_status, puts_df = td_extract.extract_option_puts_dataset( ticker='SPY') print(puts_df) :param ticker: string ticker to extract :param date: optional - string date to extract formatted ``YYYY-MM-DD`` :param work_dict: dictionary of args :param scrub_mode: optional - string type of scrubbing handler to run :param verbose: optional - boolean for turning on logging """ label = 'extract_td_puts' latest_close_date = ae_utils.get_last_close_str() use_date = date if work_dict: if not ticker: ticker = work_dict.get('ticker', None) label = f'{work_dict.get("label", label)}' if not use_date: use_date = latest_close_date ds_id = ticker df_type = td_consts.DATAFEED_TD_PUTS df_str = td_consts.get_datafeed_str_td(df_type=df_type) redis_db = ae_consts.REDIS_DB redis_key = f'{ticker}_{use_date}_tdputs' redis_host, redis_port = ae_consts.get_redis_host_and_port(req=work_dict) redis_password = ae_consts.REDIS_PASSWORD s3_key = redis_key if work_dict: redis_db = work_dict.get('redis_db', redis_db) redis_password = work_dict.get('redis_password', redis_password) verbose = work_dict.get('verbose_td', verbose) if verbose: log.info(f'{label} - {df_str} - start - redis_key={redis_key} ' f's3_key={s3_key}') exp_date_str = None puts_df = None status = ae_consts.NOT_RUN try: redis_rec = redis_get.get_data_from_redis_key(label=label, host=redis_host, port=redis_port, db=redis_db, password=redis_password, key=redis_key, decompress_df=True) status = redis_rec['status'] if verbose: log.info(f'{label} - {df_str} redis get data key={redis_key} ' f'status={ae_consts.get_status(status=status)}') if status == ae_consts.SUCCESS: puts_json = None if 'tdputs' in redis_rec['rec']['data']: puts_json = redis_rec['rec']['data']['tdputs'] if 'puts' in redis_rec['rec']['data']: puts_json = redis_rec['rec']['data']['puts'] else: puts_json = redis_rec['rec']['data'] if not puts_json: return ae_consts.SUCCESS, pd.DataFrame([]) if verbose: log.info(f'{label} - {df_str} redis convert puts to df') try: puts_df = pd.read_json(puts_json, orient='records') if len(puts_df.index) == 0: return ae_consts.SUCCESS, pd.DataFrame([]) if 'date' not in puts_df: log.debug('failed to find date column in TD puts ' f'df={puts_df} len={len(puts_df.index)}') return ae_consts.SUCCESS, pd.DataFrame([]) puts_df.sort_values(by=['date', 'strike']) """ for i, r in calls_df.iterrows(): print(r['date']) convert_epochs = [ 'ask_date', 'bid_date', 'trade_date' ] for c in convert_epochs: if c in puts_df: puts_df[c] = pd.DatetimeIndex(pd.to_datetime( puts_df[c], format=ae_consts.COMMON_TICK_DATE_FORMAT )).tz_localize( 'UTC').tz_convert( 'US/Eastern') # dates converted """ exp_date_str = (puts_df['exp_date'].iloc[-1]) puts_df['date'] = puts_df['date'].dt.strftime( ae_consts.COMMON_TICK_DATE_FORMAT) except Exception: log.debug(f'{label} - {df_str} redis_key={redis_key} ' 'no puts df found') return ae_consts.SUCCESS, pd.DataFrame([]) # end of try/ex to convert to df if verbose: log.info(f'{label} - {df_str} redis_key={redis_key} ' f'puts={len(puts_df.index)} exp_date={exp_date_str}') else: if verbose: log.info(f'{label} - {df_str} did not find valid redis ' f'option puts in redis_key={redis_key} ' f'status={ae_consts.get_status(status=status)}') except Exception as e: if verbose: log.error( f'{label} - {df_str} - ds_id={ds_id} failed getting option ' f'puts from redis={redis_host}:{redis_port}@{redis_db} ' f'key={redis_key} ex={e}') return ae_consts.ERR, pd.DataFrame([]) # end of try/ex extract from redis if verbose: log.info( f'{label} - {df_str} ds_id={ds_id} extract scrub={scrub_mode}') scrubbed_df = scrub_utils.extract_scrub_dataset( label=label, scrub_mode=scrub_mode, datafeed_type=df_type, msg_format='df={} date_str={}', ds_id=ds_id, df=puts_df) status = ae_consts.SUCCESS return status, scrubbed_df