def test_integration_extract_quote_dataset(self): """test_integration_extract_quote_dataset""" if ev('INT_TESTS', '0') == '0': return ticker = 'NFLX' label = 'IEX quote dataset' # build dataset cache dictionary work = get_ds_dict(ticker=ticker, label=label) status, df = extract_quote_dataset(work_dict=work) self._check(df=df, status=status, label=label, work=work)
def test_integration_extract_quote_dataset(self): """test_integration_extract_quote_dataset""" if ev('INT_TESTS', '0') == '0': return ticker = 'NFLX' label = 'IEX quote dataset' # build dataset cache dictionary work = get_ds_dict(ticker=ticker, label=label) status, df = extract_quote_dataset(work_dict=work) if status == SUCCESS: self.assertIsNotNone(df) self.debug_df(df=df) else: log.critical('{} is missing in redis ' 'for ticker={} status={}'.format( label, work['ticker'], get_status(status=status)))
def fetch(ticker=None, tickers=None, fetch_mode=None, iex_datasets=None, redis_enabled=True, redis_address=None, redis_db=None, redis_password=None, redis_expire=None, s3_enabled=True, s3_address=None, s3_bucket=None, s3_access_key=None, s3_secret_key=None, s3_region_name=None, s3_secure=False, celery_disabled=True, broker_url=None, result_backend=None, label=None, verbose=False): """fetch Fetch all supported datasets for a stock ``ticker`` or a list of ``tickers`` and returns a dictionary. Once run, the datasets will all be cached in Redis and archived in Minio (S3) by default. Python example: .. code-block:: python from analysis_engine.fetch import fetch d = fetch(ticker='NFLX') print(d) for k in d['NFLX']: print('dataset key: {}'.format(k)) By default, it synchronously automates: - fetching all datasets - caching all datasets in Redis - archiving all datasets in Minio (S3) - returns all datasets in a single dictionary This was created for reducing the amount of typying in Jupyter notebooks. It can be set up for use with a distributed engine as well with the optional arguments depending on your connectitivty requirements. .. note:: Please ensure Redis and Minio are running before trying to extract tickers **Stock tickers to fetch** :param ticker: single stock ticker/symbol/ETF to fetch :param tickers: optional - list of tickers to fetch **(Optional) Data sources, datafeeds and datasets to gather** :param fetch_mode: data sources - default is ``all`` (both IEX and Yahoo), ``iex`` for only IEX, ``yahoo`` for only Yahoo. :param iex_datasets: list of strings for gathering specific `IEX datasets <https://iextrading.com/developer/docs/#stocks>`__ which are set as consts: ``analysis_engine.iex.consts.FETCH_*``. **(Optional) Redis connectivity arguments** :param redis_enabled: bool - toggle for auto-caching all datasets in Redis (default is ``True``) :param redis_address: Redis connection string format: ``host:port`` (default is ``localhost:6379``) :param redis_db: Redis db to use (default is ``0``) :param redis_password: optional - Redis password (default is ``None``) :param redis_expire: optional - Redis expire value (default is ``None``) **(Optional) Minio (S3) connectivity arguments** :param s3_enabled: bool - toggle for auto-archiving on Minio (S3) (default is ``True``) :param s3_address: Minio S3 connection string format: ``host:port`` (default is ``localhost:9000``) :param s3_bucket: S3 Bucket for storing the artifacts (default is ``dev``) which should be viewable on a browser: http://localhost:9000/minio/dev/ :param s3_access_key: S3 Access key (default is ``trexaccesskey``) :param s3_secret_key: S3 Secret key (default is ``trex123321``) :param s3_region_name: S3 region name (default is ``us-east-1``) :param s3_secure: Transmit using tls encryption (default is ``False``) **(Optional) Celery worker broker connectivity arguments** :param celery_disabled: bool - toggle synchronous mode or publish to an engine connected to the `Celery broker and backend <https://github.com/celery/celery#transports-and-backends>`__ (default is ``True`` - synchronous mode without an engine or need for a broker or backend for Celery) :param broker_url: Celery broker url (default is ``redis://0.0.0.0:6379/13``) :param result_backend: Celery backend url (default is ``redis://0.0.0.0:6379/14``) :param label: tracking log label **(Optional) Debugging** :param verbose: bool - show fetch warnings and other debug logging (default is False) **Supported environment variables** :: export REDIS_ADDRESS="localhost:6379" export REDIS_DB="0" export S3_ADDRESS="localhost:9000" export S3_BUCKET="dev" export AWS_ACCESS_KEY_ID="trexaccesskey" export AWS_SECRET_ACCESS_KEY="trex123321" export AWS_DEFAULT_REGION="us-east-1" export S3_SECURE="0" export WORKER_BROKER_URL="redis://0.0.0.0:6379/13" export WORKER_BACKEND_URL="redis://0.0.0.0:6379/14" """ rec = {} extract_records = [] use_tickers = tickers if ticker: use_tickers = [ticker] else: if not use_tickers: use_tickers = [] default_iex_datasets = [ 'daily', 'minute', 'quote', 'stats', 'peers', 'news', 'financials', 'earnings', 'dividends', 'company' ] if not iex_datasets: iex_datasets = default_iex_datasets if not fetch_mode: fetch_mode = 'all' if redis_enabled: if not redis_address: redis_address = os.getenv('REDIS_ADDRESS', 'localhost:6379') if not redis_password: redis_password = os.getenv('REDIS_PASSWORD', None) if not redis_db: redis_db = int(os.getenv('REDIS_DB', '0')) if not redis_expire: redis_expire = os.getenv('REDIS_EXPIRE', None) if s3_enabled: if not s3_address: s3_address = os.getenv('S3_ADDRESS', 'localhost:9000') if not s3_access_key: s3_access_key = os.getenv('AWS_ACCESS_KEY_ID', 'trexaccesskey') if not s3_secret_key: s3_secret_key = os.getenv('AWS_SECRET_ACCESS_KEY', 'trex123321') if not s3_region_name: s3_region_name = os.getenv('AWS_DEFAULT_REGION', 'us-east-1') if not s3_secure: s3_secure = os.getenv('S3_SECURE', '0') == '1' if not s3_bucket: s3_bucket = os.getenv('S3_BUCKET', 'dev') if not broker_url: broker_url = os.getenv('WORKER_BROKER_URL', 'redis://0.0.0.0:6379/13') if not result_backend: result_backend = os.getenv('WORKER_BACKEND_URL', 'redis://0.0.0.0:6379/14') if not label: label = 'get-latest' num_tickers = len(use_tickers) last_close_str = get_last_close_str() if iex_datasets: log.info('{} - getting latest for tickers={} ' 'iex={}'.format(label, num_tickers, json.dumps(iex_datasets))) else: log.info('{} - getting latest for tickers={}'.format( label, num_tickers)) for ticker in use_tickers: ticker_key = '{}_{}'.format(ticker, last_close_str) fetch_req = build_get_new_pricing_request() fetch_req['base_key'] = ticker_key fetch_req['celery_disabled'] = celery_disabled fetch_req['ticker'] = ticker fetch_req['label'] = label fetch_req['fetch_mode'] = fetch_mode fetch_req['iex_datasets'] = iex_datasets fetch_req['s3_enabled'] = s3_enabled fetch_req['s3_bucket'] = s3_bucket fetch_req['s3_address'] = s3_address fetch_req['s3_secure'] = s3_secure fetch_req['s3_region_name'] = s3_region_name fetch_req['s3_access_key'] = s3_access_key fetch_req['s3_secret_key'] = s3_secret_key fetch_req['s3_key'] = ticker_key fetch_req['redis_enabled'] = redis_enabled fetch_req['redis_address'] = redis_address fetch_req['redis_password'] = redis_password fetch_req['redis_db'] = redis_db fetch_req['redis_key'] = ticker_key fetch_req['redis_expire'] = redis_expire fetch_req['redis_address'] = redis_address fetch_req['s3_address'] = s3_address log.info('{} - fetching ticker={} last_close={} ' 'redis_address={} s3_address={}'.format( label, ticker, last_close_str, fetch_req['redis_address'], fetch_req['s3_address'])) fetch_res = price_utils.run_get_new_pricing_data(work_dict=fetch_req) if fetch_res['status'] == SUCCESS: log.info('{} - fetched ticker={} ' 'preparing for extraction'.format(label, ticker)) extract_req = fetch_req extract_records.append(extract_req) else: log.warning('{} - failed getting ticker={} data ' 'status={} err={}'.format( label, ticker, get_status(status=fetch_res['status']), fetch_res['err'])) # end of if worked or not # end for all tickers to fetch """ Extract Datasets """ iex_daily_status = FAILED iex_minute_status = FAILED iex_quote_status = FAILED iex_stats_status = FAILED iex_peers_status = FAILED iex_news_status = FAILED iex_financials_status = FAILED iex_earnings_status = FAILED iex_dividends_status = FAILED iex_company_status = FAILED yahoo_news_status = FAILED yahoo_options_status = FAILED yahoo_pricing_status = FAILED iex_daily_df = None iex_minute_df = None iex_quote_df = None iex_stats_df = None iex_peers_df = None iex_news_df = None iex_financials_df = None iex_earnings_df = None iex_dividends_df = None iex_company_df = None yahoo_option_calls_df = None yahoo_option_puts_df = None yahoo_pricing_df = None yahoo_news_df = None extract_iex = True if fetch_mode not in ['all', 'iex']: extract_iex = False extract_yahoo = True if fetch_mode not in ['all', 'yahoo']: extract_yahoo = False for service_dict in extract_records: ticker_data = {} ticker = service_dict['ticker'] extract_req = get_ds_dict(ticker=ticker, base_key=service_dict.get('base_key', None), ds_id=label, service_dict=service_dict) if 'daily' in iex_datasets or extract_iex: iex_daily_status, iex_daily_df = \ iex_extract_utils.extract_daily_dataset( extract_req) if iex_daily_status != SUCCESS: if verbose: log.warning('unable to fetch iex_daily={}'.format(ticker)) if 'minute' in iex_datasets or extract_iex: iex_minute_status, iex_minute_df = \ iex_extract_utils.extract_minute_dataset( extract_req) if iex_minute_status != SUCCESS: if verbose: log.warning('unable to fetch iex_minute={}'.format(ticker)) if 'quote' in iex_datasets or extract_iex: iex_quote_status, iex_quote_df = \ iex_extract_utils.extract_quote_dataset( extract_req) if iex_quote_status != SUCCESS: if verbose: log.warning('unable to fetch iex_quote={}'.format(ticker)) if 'stats' in iex_datasets or extract_iex: iex_stats_df, iex_stats_df = \ iex_extract_utils.extract_stats_dataset( extract_req) if iex_stats_status != SUCCESS: if verbose: log.warning('unable to fetch iex_stats={}'.format(ticker)) if 'peers' in iex_datasets or extract_iex: iex_peers_df, iex_peers_df = \ iex_extract_utils.extract_peers_dataset( extract_req) if iex_peers_status != SUCCESS: if verbose: log.warning('unable to fetch iex_peers={}'.format(ticker)) if 'news' in iex_datasets or extract_iex: iex_news_status, iex_news_df = \ iex_extract_utils.extract_news_dataset( extract_req) if iex_news_status != SUCCESS: if verbose: log.warning('unable to fetch iex_news={}'.format(ticker)) if 'financials' in iex_datasets or extract_iex: iex_financials_status, iex_financials_df = \ iex_extract_utils.extract_financials_dataset( extract_req) if iex_financials_status != SUCCESS: if verbose: log.warning( 'unable to fetch iex_financials={}'.format(ticker)) if 'earnings' in iex_datasets or extract_iex: iex_earnings_status, iex_earnings_df = \ iex_extract_utils.extract_dividends_dataset( extract_req) if iex_earnings_status != SUCCESS: if verbose: log.warning( 'unable to fetch iex_earnings={}'.format(ticker)) if 'dividends' in iex_datasets or extract_iex: iex_dividends_status, iex_dividends_df = \ iex_extract_utils.extract_dividends_dataset( extract_req) if iex_dividends_status != SUCCESS: if verbose: log.warning( 'unable to fetch iex_dividends={}'.format(ticker)) if 'company' in iex_datasets or extract_iex: iex_company_status, iex_company_df = \ iex_extract_utils.extract_dividends_dataset( extract_req) if iex_company_status != SUCCESS: if verbose: log.warning( 'unable to fetch iex_company={}'.format(ticker)) # end of iex extracts if extract_yahoo: yahoo_options_status, yahoo_option_calls_df = \ yahoo_extract_utils.extract_option_calls_dataset( extract_req) yahoo_options_status, yahoo_option_puts_df = \ yahoo_extract_utils.extract_option_puts_dataset( extract_req) if yahoo_options_status != SUCCESS: if verbose: log.warning( 'unable to fetch yahoo_options={}'.format(ticker)) yahoo_pricing_status, yahoo_pricing_df = \ yahoo_extract_utils.extract_pricing_dataset( extract_req) if yahoo_pricing_status != SUCCESS: if verbose: log.warning( 'unable to fetch yahoo_pricing={}'.format(ticker)) yahoo_news_status, yahoo_news_df = \ yahoo_extract_utils.extract_yahoo_news_dataset( extract_req) if yahoo_news_status != SUCCESS: if verbose: log.warning('unable to fetch yahoo_news={}'.format(ticker)) # end of yahoo extracts ticker_data['daily'] = iex_daily_df ticker_data['minute'] = iex_minute_df ticker_data['quote'] = iex_quote_df ticker_data['stats'] = iex_stats_df ticker_data['peers'] = iex_peers_df ticker_data['news1'] = iex_news_df ticker_data['financials'] = iex_financials_df ticker_data['earnings'] = iex_earnings_df ticker_data['dividends'] = iex_dividends_df ticker_data['company'] = iex_company_df ticker_data['calls'] = yahoo_option_calls_df ticker_data['puts'] = yahoo_option_puts_df ticker_data['pricing'] = yahoo_pricing_df ticker_data['news'] = yahoo_news_df rec[ticker] = ticker_data # end of for service_dict in extract_records return rec
def run_algo( ticker=None, tickers=None, algo=None, # optional derived ``analysis_engine.algo.Algo`` instance balance=None, # float starting base capital commission=None, # float for single trade commission for buy or sell start_date=None, # string YYYY-MM-DD HH:MM:SS end_date=None, # string YYYY-MM-DD HH:MM:SS datasets=None, # string list of identifiers num_owned_dict=None, # not supported cache_freq='daily', # 'minute' not supported auto_fill=True, load_config=None, report_config=None, history_config=None, extract_config=None, use_key=None, extract_mode='all', iex_datasets=None, redis_enabled=True, redis_address=None, redis_db=None, redis_password=None, redis_expire=None, redis_key=None, s3_enabled=True, s3_address=None, s3_bucket=None, s3_access_key=None, s3_secret_key=None, s3_region_name=None, s3_secure=False, s3_key=None, celery_disabled=True, broker_url=None, result_backend=None, label=None, name=None, timeseries=None, trade_strategy=None, verbose=False, publish_to_slack=True, publish_to_s3=True, publish_to_redis=True, extract_datasets=None, config_file=None, config_dict=None, version=1, raise_on_err=True, **kwargs): """run_algo Run an algorithm with steps: 1) Extract redis keys between dates 2) Compile a data pipeline dictionary (call it ``data``) 3) Call algorithm's ``myalgo.handle_data(data=data)`` .. note:: If no ``algo`` is set, the ``analysis_engine.algo.BaseAlgo`` algorithm is used. .. note:: Please ensure Redis and Minio are running before trying to extract tickers **Stock tickers to extract** :param ticker: single stock ticker/symbol/ETF to extract :param tickers: optional - list of tickers to extract :param use_key: optional - extract historical key from Redis **Algo Configuration** :param algo: derived instance of ``analysis_engine.algo.Algo`` object :param balance: optional - float balance parameter can also be set on the ``algo`` object if not set on the args :param commission: float for single trade commission for buy or sell. can also be set on the ``algo`` objet :param start_date: string ``YYYY-MM-DD_HH:MM:SS`` cache value :param end_date: string ``YYYY-MM-DD_HH:MM:SS`` cache value :param dataset_types: list of strings that are ``iex`` or ``yahoo`` datasets that are cached. :param cache_freq: optional - depending on if you are running data feeds on a ``daily`` cron (default) vs every ``minute`` (or faster) :param num_owned_dict: not supported yet :param auto_fill: optional - boolean for auto filling buy/sell orders for backtesting (default is ``True``) :param trading_calendar: ``trading_calendar.TradingCalendar`` object, by default ``analysis_engine.calendars. always_open.AlwaysOpen`` trading calendar # TradingCalendar by ``TFSExchangeCalendar`` :param config_file: path to a json file containing custom algorithm object member values (like indicator configuration and predict future date units ahead for a backtest) :param config_dict: optional - dictionary that can be passed to derived class implementations of: ``def load_from_config(config_dict=config_dict)`` **Timeseries** :param timeseries: optional - string to set ``day`` or ``minute`` backtesting or live trading (default is ``minute``) **Trading Strategy** :param trade_strategy: optional - string to set the type of ``Trading Strategy`` for backtesting or live trading (default is ``count``) **Algorithm Dataset Loading, Extracting, Reporting and Trading History arguments** :param load_config: optional - dictionary for setting member variables to load an agorithm-ready dataset from a file, s3 or redis :param report_config: optional - dictionary for setting member variables to publish an algo ``trading performance report`` to s3, redis, a file or slack :param history_config: optional - dictionary for setting member variables to publish an algo ``trade history`` to s3, redis, a file or slack :param extract_config: optional - dictionary for setting member variables to publish an algo ``trading performance report`` to s3, redis, a file or slack **(Optional) Data sources, datafeeds and datasets to gather** :param iex_datasets: list of strings for gathering specific `IEX datasets <https://iextrading.com/developer/docs/#stocks>`__ which are set as consts: ``analysis_engine.iex.consts.FETCH_*``. **(Optional) Redis connectivity arguments** :param redis_enabled: bool - toggle for auto-caching all datasets in Redis (default is ``True``) :param redis_address: Redis connection string format: ``host:port`` (default is ``localhost:6379``) :param redis_db: Redis db to use (default is ``0``) :param redis_password: optional - Redis password (default is ``None``) :param redis_expire: optional - Redis expire value (default is ``None``) :param redis_key: optional - redis key not used (default is ``None``) **(Optional) Minio (S3) connectivity arguments** :param s3_enabled: bool - toggle for auto-archiving on Minio (S3) (default is ``True``) :param s3_address: Minio S3 connection string format: ``host:port`` (default is ``localhost:9000``) :param s3_bucket: S3 Bucket for storing the artifacts (default is ``dev``) which should be viewable on a browser: http://localhost:9000/minio/dev/ :param s3_access_key: S3 Access key (default is ``trexaccesskey``) :param s3_secret_key: S3 Secret key (default is ``trex123321``) :param s3_region_name: S3 region name (default is ``us-east-1``) :param s3_secure: Transmit using tls encryption (default is ``False``) :param s3_key: optional s3 key not used (default is ``None``) **(Optional) Celery worker broker connectivity arguments** :param celery_disabled: bool - toggle synchronous mode or publish to an engine connected to the `Celery broker and backend <https://github.com/celery/celery#transports-and-backends>`__ (default is ``True`` - synchronous mode without an engine or need for a broker or backend for Celery) :param broker_url: Celery broker url (default is ``redis://0.0.0.0:6379/13``) :param result_backend: Celery backend url (default is ``redis://0.0.0.0:6379/14``) :param label: tracking log label :param publish_to_slack: optional - boolean for publishing to slack (coming soon) :param publish_to_s3: optional - boolean for publishing to s3 (coming soon) :param publish_to_redis: optional - boolean for publishing to redis (coming soon) **(Optional) Debugging** :param verbose: bool - show extract warnings and other debug logging (default is False) :param raise_on_err: optional - boolean for unittests and developing algorithms with the ``analysis_engine.run_algo.run_algo`` helper. When set to ``True`` exceptions will are raised to the calling functions :param kwargs: keyword arguments dictionary """ # dictionary structure with a list sorted on: ascending dates # algo_data_req[ticker][list][dataset] = pd.DataFrame algo_data_req = {} extract_requests = [] return_algo = False # return created algo objects for use by caller rec = {} msg = None use_tickers = tickers use_balance = balance use_commission = commission if ticker: use_tickers = [ticker] else: if not use_tickers: use_tickers = [] # if these are not set as args, but the algo object # has them, use them instead: if algo: if len(use_tickers) == 0: use_tickers = algo.get_tickers() if not use_balance: use_balance = algo.get_balance() if not use_commission: use_commission = algo.get_commission() default_iex_datasets = [ 'daily', 'minute', 'quote', 'stats', 'peers', 'news', 'financials', 'earnings', 'dividends', 'company' ] if not iex_datasets: iex_datasets = default_iex_datasets if redis_enabled: if not redis_address: redis_address = os.getenv('REDIS_ADDRESS', 'localhost:6379') if not redis_password: redis_password = os.getenv('REDIS_PASSWORD', None) if not redis_db: redis_db = int(os.getenv('REDIS_DB', '0')) if not redis_expire: redis_expire = os.getenv('REDIS_EXPIRE', None) if s3_enabled: if not s3_address: s3_address = os.getenv('S3_ADDRESS', 'localhost:9000') if not s3_access_key: s3_access_key = os.getenv('AWS_ACCESS_KEY_ID', 'trexaccesskey') if not s3_secret_key: s3_secret_key = os.getenv('AWS_SECRET_ACCESS_KEY', 'trex123321') if not s3_region_name: s3_region_name = os.getenv('AWS_DEFAULT_REGION', 'us-east-1') if not s3_secure: s3_secure = os.getenv('S3_SECURE', '0') == '1' if not s3_bucket: s3_bucket = os.getenv('S3_BUCKET', 'dev') if not broker_url: broker_url = os.getenv('WORKER_BROKER_URL', 'redis://0.0.0.0:6379/11') if not result_backend: result_backend = os.getenv('WORKER_BACKEND_URL', 'redis://0.0.0.0:6379/12') if not label: label = 'run-algo' num_tickers = len(use_tickers) last_close_str = ae_utils.get_last_close_str() if iex_datasets: if verbose: log.info('{} - tickers={} ' 'iex={}'.format(label, num_tickers, json.dumps(iex_datasets))) else: if verbose: log.info('{} - tickers={}'.format(label, num_tickers)) ticker_key = use_key if not ticker_key: ticker_key = '{}_{}'.format(ticker, last_close_str) if not algo: algo = base_algo.BaseAlgo(ticker=None, tickers=use_tickers, balance=use_balance, commission=use_commission, config_dict=config_dict, name=label, auto_fill=auto_fill, timeseries=timeseries, trade_strategy=trade_strategy, publish_to_slack=publish_to_slack, publish_to_s3=publish_to_s3, publish_to_redis=publish_to_redis, raise_on_err=raise_on_err) return_algo = True # the algo object is stored # in the result at: res['rec']['algo'] if not algo: msg = ('{} - missing algo object'.format(label)) log.error(msg) return build_result.build_result(status=ae_consts.EMPTY, err=msg, rec=rec) if raise_on_err: log.debug('{} - enabling algo exception raises'.format(label)) algo.raise_on_err = True common_vals = {} common_vals['base_key'] = ticker_key common_vals['celery_disabled'] = celery_disabled common_vals['ticker'] = ticker common_vals['label'] = label common_vals['iex_datasets'] = iex_datasets common_vals['s3_enabled'] = s3_enabled common_vals['s3_bucket'] = s3_bucket common_vals['s3_address'] = s3_address common_vals['s3_secure'] = s3_secure common_vals['s3_region_name'] = s3_region_name common_vals['s3_access_key'] = s3_access_key common_vals['s3_secret_key'] = s3_secret_key common_vals['s3_key'] = ticker_key common_vals['redis_enabled'] = redis_enabled common_vals['redis_address'] = redis_address common_vals['redis_password'] = redis_password common_vals['redis_db'] = redis_db common_vals['redis_key'] = ticker_key common_vals['redis_expire'] = redis_expire """ Extract Datasets """ iex_daily_status = ae_consts.FAILED iex_minute_status = ae_consts.FAILED iex_quote_status = ae_consts.FAILED iex_stats_status = ae_consts.FAILED iex_peers_status = ae_consts.FAILED iex_news_status = ae_consts.FAILED iex_financials_status = ae_consts.FAILED iex_earnings_status = ae_consts.FAILED iex_dividends_status = ae_consts.FAILED iex_company_status = ae_consts.FAILED yahoo_news_status = ae_consts.FAILED yahoo_options_status = ae_consts.FAILED yahoo_pricing_status = ae_consts.FAILED td_calls_status = ae_consts.FAILED td_puts_status = ae_consts.FAILED iex_daily_df = None iex_minute_df = None iex_quote_df = None iex_stats_df = None iex_peers_df = None iex_news_df = None iex_financials_df = None iex_earnings_df = None iex_dividends_df = None iex_company_df = None yahoo_option_calls_df = None yahoo_option_puts_df = None yahoo_pricing_df = None yahoo_news_df = None td_calls_df = None td_puts_df = None use_start_date_str = start_date use_end_date_str = end_date last_close_date = ae_utils.last_close() end_date_val = None cache_freq_fmt = ae_consts.COMMON_TICK_DATE_FORMAT if not use_end_date_str: use_end_date_str = last_close_date.strftime(cache_freq_fmt) end_date_val = ae_utils.get_date_from_str(date_str=use_end_date_str, fmt=cache_freq_fmt) start_date_val = None if not use_start_date_str: start_date_val = end_date_val - datetime.timedelta(days=60) use_start_date_str = start_date_val.strftime(cache_freq_fmt) else: start_date_val = datetime.datetime.strptime( use_start_date_str, ae_consts.COMMON_TICK_DATE_FORMAT) total_dates = (end_date_val - start_date_val).days if end_date_val < start_date_val: msg = ('{} - invalid dates - start_date={} is after ' 'end_date={}'.format(label, start_date_val, end_date_val)) raise Exception(msg) log.debug('{} - days={} start={} end={} datatset={}'.format( label, total_dates, use_start_date_str, use_end_date_str, datasets)) for ticker in use_tickers: req = algo_utils.build_algo_request(ticker=ticker, use_key=use_key, start_date=use_start_date_str, end_date=use_end_date_str, datasets=datasets, balance=use_balance, cache_freq=cache_freq, timeseries=timeseries, trade_strategy=trade_strategy, label=label) ticker_key = '{}_{}'.format(ticker, last_close_str) common_vals['ticker'] = ticker common_vals['base_key'] = ticker_key common_vals['redis_key'] = ticker_key common_vals['s3_key'] = ticker_key for date_key in req['extract_datasets']: date_req = api_requests.get_ds_dict(ticker=ticker, base_key=date_key, ds_id=label, service_dict=common_vals) node_date_key = date_key.replace('{}_'.format(ticker), '') extract_requests.append({ 'id': date_key, 'ticker': ticker, 'date_key': date_key, 'date': node_date_key, 'req': date_req }) # end of for all ticker in use_tickers extract_iex = True if extract_mode not in ['all', 'iex']: extract_iex = False extract_yahoo = True if extract_mode not in ['all', 'yahoo']: extract_yahoo = False extract_td = True if extract_mode not in ['all', 'td']: extract_td = False first_extract_date = None last_extract_date = None total_extract_requests = len(extract_requests) cur_idx = 1 for idx, extract_node in enumerate(extract_requests): iex_daily_df = None iex_minute_df = None iex_quote_df = None iex_stats_df = None iex_peers_df = None iex_news_df = None iex_financials_df = None iex_earnings_df = None iex_dividends_df = None iex_company_df = None yahoo_option_calls_df = None yahoo_option_puts_df = None yahoo_pricing_df = None yahoo_news_df = None td_calls_df = None td_puts_df = None extract_ticker = extract_node['ticker'] extract_date = extract_node['date'] extract_req = extract_node['req'] dataset_node_id = extract_node['id'] if not first_extract_date: first_extract_date = extract_date last_extract_date = extract_date dataset_id = dataset_node_id percent_label = ('{} ticker={} date={} {} {}/{}'.format( label, extract_ticker, extract_date, ae_consts.get_percent_done(progress=cur_idx, total=total_extract_requests), idx, total_extract_requests)) log.debug('{} - extract - start'.format(percent_label)) if 'daily' in iex_datasets or extract_iex: iex_daily_status, iex_daily_df = \ iex_extract_utils.extract_daily_dataset( extract_req) if iex_daily_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract iex_daily={}'.format(ticker)) if 'minute' in iex_datasets or extract_iex: iex_minute_status, iex_minute_df = \ iex_extract_utils.extract_minute_dataset( extract_req) if iex_minute_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract iex_minute={}'.format(ticker)) if 'quote' in iex_datasets or extract_iex: iex_quote_status, iex_quote_df = \ iex_extract_utils.extract_quote_dataset( extract_req) if iex_quote_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract iex_quote={}'.format(ticker)) if 'stats' in iex_datasets or extract_iex: iex_stats_df, iex_stats_df = \ iex_extract_utils.extract_stats_dataset( extract_req) if iex_stats_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract iex_stats={}'.format(ticker)) if 'peers' in iex_datasets or extract_iex: iex_peers_df, iex_peers_df = \ iex_extract_utils.extract_peers_dataset( extract_req) if iex_peers_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract iex_peers={}'.format(ticker)) if 'news' in iex_datasets or extract_iex: iex_news_status, iex_news_df = \ iex_extract_utils.extract_news_dataset( extract_req) if iex_news_status != ae_consts.SUCCESS: if verbose: log.warning('unable to extract iex_news={}'.format(ticker)) if 'financials' in iex_datasets or extract_iex: iex_financials_status, iex_financials_df = \ iex_extract_utils.extract_financials_dataset( extract_req) if iex_financials_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract iex_financials={}'.format(ticker)) if 'earnings' in iex_datasets or extract_iex: iex_earnings_status, iex_earnings_df = \ iex_extract_utils.extract_earnings_dataset( extract_req) if iex_earnings_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract iex_earnings={}'.format(ticker)) if 'dividends' in iex_datasets or extract_iex: iex_dividends_status, iex_dividends_df = \ iex_extract_utils.extract_dividends_dataset( extract_req) if iex_dividends_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract iex_dividends={}'.format(ticker)) if 'company' in iex_datasets or extract_iex: iex_company_status, iex_company_df = \ iex_extract_utils.extract_company_dataset( extract_req) if iex_company_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract iex_company={}'.format(ticker)) # end of iex extracts if extract_yahoo: yahoo_options_status, yahoo_option_calls_df = \ yahoo_extract_utils.extract_option_calls_dataset( extract_req) yahoo_options_status, yahoo_option_puts_df = \ yahoo_extract_utils.extract_option_puts_dataset( extract_req) if yahoo_options_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract yahoo_options={}'.format(ticker)) yahoo_pricing_status, yahoo_pricing_df = \ yahoo_extract_utils.extract_pricing_dataset( extract_req) if yahoo_pricing_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract yahoo_pricing={}'.format(ticker)) yahoo_news_status, yahoo_news_df = \ yahoo_extract_utils.extract_yahoo_news_dataset( extract_req) if yahoo_news_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract yahoo_news={}'.format(ticker)) # end of yahoo extracts if extract_td: """ Debug by setting: extract_req['verbose_td'] = True """ td_calls_status, td_calls_df = \ td_extract_utils.extract_option_calls_dataset( extract_req) if td_calls_status != ae_consts.SUCCESS: if verbose: log.warning('unable to extract tdcalls={}'.format(ticker)) td_puts_status, td_puts_df = \ td_extract_utils.extract_option_puts_dataset( extract_req) if td_puts_status != ae_consts.SUCCESS: if verbose: log.warning('unable to extract tdputs={}'.format(ticker)) # td extracts # map extracted data to DEFAULT_SERIALIZED_DATASETS ticker_data = {} ticker_data['daily'] = iex_daily_df ticker_data['minute'] = iex_minute_df ticker_data['quote'] = iex_quote_df ticker_data['stats'] = iex_stats_df ticker_data['peers'] = iex_peers_df ticker_data['news1'] = iex_news_df ticker_data['financials'] = iex_financials_df ticker_data['earnings'] = iex_earnings_df ticker_data['dividends'] = iex_dividends_df ticker_data['company'] = iex_company_df ticker_data['calls'] = yahoo_option_calls_df ticker_data['puts'] = yahoo_option_puts_df ticker_data['pricing'] = yahoo_pricing_df ticker_data['news'] = yahoo_news_df ticker_data['tdcalls'] = td_calls_df ticker_data['tdputs'] = td_puts_df if ticker not in algo_data_req: algo_data_req[ticker] = [] algo_data_req[ticker].append({ 'id': dataset_id, # id is currently the cache key in redis 'date': extract_date, # used to confirm dates in asc order 'data': ticker_data }) if verbose: log.info('extract - {} dataset={}'.format( percent_label, len(algo_data_req[ticker]))) cur_idx += 1 # end of for service_dict in extract_requests # this could be a separate celery task status = ae_consts.NOT_RUN if len(algo_data_req) == 0: msg = ('{} - nothing to test - no data found for tickers={} ' 'between {} and {}'.format(label, use_tickers, first_extract_date, last_extract_date)) log.info(msg) return build_result.build_result(status=ae_consts.EMPTY, err=msg, rec=rec) # this could be a separate celery task try: if verbose: log.info('handle_data START - {} from {} to {}'.format( percent_label, first_extract_date, last_extract_date)) algo.handle_data(data=algo_data_req) if verbose: log.info('handle_data END - {} from {} to {}'.format( percent_label, first_extract_date, last_extract_date)) except Exception as e: msg = ('{} - algo={} encountered exception in handle_data ' 'tickers={} from ' '{} to {} ex={}'.format(percent_label, algo.get_name(), use_tickers, first_extract_date, last_extract_date, e)) if raise_on_err: if algo: log.error('algo failure report: algo={} handle_data() ' '{} and config={}'.format( algo.get_name(), algo.get_debug_msg(), ae_consts.ppj(algo.config_dict))) try: ind_obj = \ algo.get_indicator_process_last_indicator() if ind_obj: found_error_hint = False if hasattr(ind_obj.use_df, 'to_json'): if len(ind_obj.use_df.index) == 0: log.critical( 'indicator failure report for ' 'last module: ' '{} indicator={} config={} dataset={} ' 'name_of_dataset={}'.format( ind_obj.get_path_to_module(), ind_obj.get_name(), ae_consts.ppj(ind_obj.get_config()), ind_obj.use_df, ind_obj.uses_data)) log.critical( '--------------------------------------' '--------------------------------------') log.critical( 'Please check if this indicator: ' '{} ' 'supports Empty Dataframes: {}'.format( ind_obj.get_path_to_module(), ind_obj.use_df)) log.critical( '--------------------------------------' '--------------------------------------') found_error_hint = True # indicator error hints if not found_error_hint: log.critical( 'indicator failure report for last module: ' '{} indicator={} config={} dataset={} ' 'name_of_dataset={}'.format( ind_obj.get_path_to_module(), ind_obj.get_name(), ae_consts.ppj(ind_obj.get_config()), ind_obj.use_df, ind_obj.uses_data)) except Exception as f: log.critical('failed to pull indicator processor ' 'last indicator for debugging ' 'from ex={} with parsing ex={}' ''.format(e, f)) # end of ignoring non-supported ways of creating # indicator processors log.error(msg) raise e else: log.error(msg) return build_result.build_result(status=ae_consts.ERR, err=msg, rec=rec) # end of try/ex # this could be a separate celery task try: if verbose: log.info('get_result START - {} from {} to {}'.format( percent_label, first_extract_date, last_extract_date)) rec = algo.get_result() status = ae_consts.SUCCESS if verbose: log.info('get_result END - {} from {} to {}'.format( percent_label, first_extract_date, last_extract_date)) except Exception as e: msg = ('{} - algo={} encountered exception in get_result ' 'tickers={} from ' '{} to {} ex={}'.format(percent_label, algo.get_name(), use_tickers, first_extract_date, last_extract_date, e)) if raise_on_err: if algo: log.error('algo={} failed in get_result with debug_msg' '={}'.format(algo.get_name(), algo.get_debug_msg())) log.error(msg) raise e else: log.error(msg) return build_result.build_result(status=ae_consts.ERR, err=msg, rec=rec) # end of try/ex if return_algo: rec['algo'] = algo return build_result.build_result(status=status, err=msg, rec=rec)
def build_dataset_node(ticker, datasets, date=None, service_dict=None, log_label=None, redis_enabled=True, redis_address=None, redis_db=None, redis_password=None, redis_expire=None, redis_key=None, s3_enabled=True, s3_address=None, s3_bucket=None, s3_access_key=None, s3_secret_key=None, s3_region_name=None, s3_secure=False, s3_key=None, verbose=False): """build_dataset_node Helper for building a dictionary that of cached datasets from redis. The datasets should be built from off the algorithm's config indicators ``uses_data`` fields which if not set will default to ``minute`` data :param ticker: string ticker :param datasets: list of string dataset names to extract from redis :param date: optional - string datetime formatted ``YYYY-MM-DD`` (default is last trading close date) :param service_dict: optional - dictionary for all service connectivity to Redis and Minio if not set the arguments for all ``s3_*`` and ``redis_*`` will be used to lookup data in Redis and Minio **(Optional) Redis connectivity arguments** :param redis_enabled: bool - toggle for auto-caching all datasets in Redis (default is ``True``) :param redis_address: Redis connection string format is ``host:port`` (default is ``localhost:6379``) :param redis_db: Redis db to use (default is ``0``) :param redis_password: optional - Redis password (default is ``None``) :param redis_expire: optional - Redis expire value (default is ``None``) :param redis_key: optional - redis key not used (default is ``None``) :param s3_enabled: bool - toggle for turning on/off Minio or AWS S3 (default is ``True``) :param s3_address: Minio S3 connection string address format is ``host:port`` (default is ``localhost:9000``) :param s3_bucket: S3 Bucket for storing the artifacts (default is ``dev``) which should be viewable on a browser: http://localhost:9000/minio/dev/ :param s3_access_key: S3 Access key (default is ``trexaccesskey``) :param s3_secret_key: S3 Secret key (default is ``trex123321``) :param s3_region_name: S3 region name (default is ``us-east-1``) :param s3_secure: Transmit using tls encryption (default is ``False``) :param s3_key: optional s3 key not used (default is ``None``) **Debugging** :param log_label: optional - log label string :param verbose: optional - flag for debugging (default to ``False``) """ label = log_label if not label: label = 'build_bt' if not date: date = ae_utils.get_last_close_str() td_convert_to_datetime = (ae_consts.TRADIER_CONVERT_TO_DATETIME) date_key = f'{ticker}_{date}' base_req = api_requests.get_ds_dict(ticker=ticker, base_key=date_key, ds_id=label, service_dict=service_dict) if not service_dict: base_req['redis_enabled'] = redis_enabled base_req['redis_address'] = redis_address base_req['redis_password'] = redis_password base_req['redis_db'] = redis_db base_req['redis_key'] = date_key base_req['redis_expire'] = redis_expire base_req['s3_enabled'] = s3_enabled base_req['s3_bucket'] = s3_bucket base_req['s3_address'] = s3_address base_req['s3_secure'] = s3_secure base_req['s3_region_name'] = s3_region_name base_req['s3_access_key'] = s3_access_key base_req['s3_secret_key'] = s3_secret_key base_req['s3_key'] = date_key if verbose: log.info(f'extracting {date_key}') """ for showing connectivity args in the logs log.debug( f'bt {date_key} {ae_consts.ppj(base_req)}') """ iex_daily_status = ae_consts.FAILED iex_minute_status = ae_consts.FAILED iex_quote_status = ae_consts.FAILED iex_stats_status = ae_consts.FAILED iex_peers_status = ae_consts.FAILED iex_news_status = ae_consts.FAILED iex_financials_status = ae_consts.FAILED iex_earnings_status = ae_consts.FAILED iex_dividends_status = ae_consts.FAILED iex_company_status = ae_consts.FAILED td_calls_status = ae_consts.FAILED td_puts_status = ae_consts.FAILED iex_daily_df = None iex_minute_df = None iex_quote_df = None iex_stats_df = None iex_peers_df = None iex_news_df = None iex_financials_df = None iex_earnings_df = None iex_dividends_df = None iex_company_df = None td_calls_df = None td_puts_df = None if 'daily' in datasets: iex_daily_status, iex_daily_df = \ iex_extract_utils.extract_daily_dataset( base_req) if iex_daily_status != ae_consts.SUCCESS: if verbose: log.warn(f'unable to extract iex_daily={ticker}') if 'minute' in datasets: iex_minute_status, iex_minute_df = \ iex_extract_utils.extract_minute_dataset( base_req) if iex_minute_status != ae_consts.SUCCESS: if verbose: log.warn(f'unable to extract iex_minute={ticker}') if 'quote' in datasets: iex_quote_status, iex_quote_df = \ iex_extract_utils.extract_quote_dataset( base_req) if iex_quote_status != ae_consts.SUCCESS: if verbose: log.warn(f'unable to extract iex_quote={ticker}') if 'stats' in datasets: iex_stats_df, iex_stats_df = \ iex_extract_utils.extract_stats_dataset( base_req) if iex_stats_status != ae_consts.SUCCESS: if verbose: log.warn(f'unable to extract iex_stats={ticker}') if 'peers' in datasets: iex_peers_df, iex_peers_df = \ iex_extract_utils.extract_peers_dataset( base_req) if iex_peers_status != ae_consts.SUCCESS: if verbose: log.warn(f'unable to extract iex_peers={ticker}') if 'news' in datasets: iex_news_status, iex_news_df = \ iex_extract_utils.extract_news_dataset( base_req) if iex_news_status != ae_consts.SUCCESS: if verbose: log.warn(f'unable to extract iex_news={ticker}') if 'financials' in datasets: iex_financials_status, iex_financials_df = \ iex_extract_utils.extract_financials_dataset( base_req) if iex_financials_status != ae_consts.SUCCESS: if verbose: log.warn(f'unable to extract iex_financials={ticker}') if 'earnings' in datasets: iex_earnings_status, iex_earnings_df = \ iex_extract_utils.extract_earnings_dataset( base_req) if iex_earnings_status != ae_consts.SUCCESS: if verbose: log.warn(f'unable to extract iex_earnings={ticker}') if 'dividends' in datasets: iex_dividends_status, iex_dividends_df = \ iex_extract_utils.extract_dividends_dataset( base_req) if iex_dividends_status != ae_consts.SUCCESS: if verbose: log.warn(f'unable to extract iex_dividends={ticker}') if 'company' in datasets: iex_company_status, iex_company_df = \ iex_extract_utils.extract_company_dataset( base_req) if iex_company_status != ae_consts.SUCCESS: if verbose: log.warn(f'unable to extract iex_company={ticker}') # end of iex extracts """ Yahoo disabled on Jan 2019 yahoo_news_status = ae_consts.FAILED yahoo_options_status = ae_consts.FAILED yahoo_pricing_status = ae_consts.FAILED yahoo_option_calls_df = None yahoo_option_puts_df = None yahoo_pricing_df = None yahoo_news_df = None if 'options' in datasets: yahoo_options_status, yahoo_option_calls_df = \ yahoo_extract_utils.extract_option_calls_dataset( base_req) yahoo_options_status, yahoo_option_puts_df = \ yahoo_extract_utils.extract_option_puts_dataset( base_req) if yahoo_options_status != ae_consts.SUCCESS: if verbose: log.warn(f'unable to extract yahoo_options={ticker}') if 'pricing' in datasets: yahoo_pricing_status, yahoo_pricing_df = \ yahoo_extract_utils.extract_pricing_dataset( base_req) if yahoo_pricing_status != ae_consts.SUCCESS: if verbose: log.warn(f'unable to extract yahoo_pricing={ticker}') if 'news' in datasets: yahoo_news_status, yahoo_news_df = \ yahoo_extract_utils.extract_yahoo_news_dataset( base_req) if yahoo_news_status != ae_consts.SUCCESS: if verbose: log.warn(f'unable to extract yahoo_news={ticker}') # end of yahoo extracts """ """ Tradier Extraction Debug by setting: base_req['verbose_td'] = True """ if ('calls' in datasets or 'tdcalls' in datasets): td_calls_status, td_calls_df = \ td_extract_utils.extract_option_calls_dataset( base_req) if td_calls_status != ae_consts.SUCCESS: if verbose: log.warn(f'unable to extract tdcalls={ticker}') else: if ae_consts.is_df(df=td_calls_df): for c in td_convert_to_datetime: if c in td_calls_df: td_calls_df[c] = pd.to_datetime( td_calls_df[c], format=ae_consts.COMMON_TICK_DATE_FORMAT) if 'date' in td_calls_df: td_calls_df.sort_values('date', ascending=True) # end of converting dates # end of Tradier calls extraction if ('puts' in datasets or 'tdputs' in datasets): td_puts_status, td_puts_df = \ td_extract_utils.extract_option_puts_dataset( base_req) if td_puts_status != ae_consts.SUCCESS: if verbose: log.warn(f'unable to extract tdputs={ticker}') else: if ae_consts.is_df(df=td_puts_df): for c in td_convert_to_datetime: if c in td_puts_df: td_puts_df[c] = pd.to_datetime( td_puts_df[c], format=ae_consts.COMMON_TICK_DATE_FORMAT) if 'date' in td_puts_df: td_puts_df.sort_values('date', ascending=True) # end of converting dates # end of Tradier puts extraction ticker_data = { 'daily': iex_daily_df, 'minute': iex_minute_df, 'quote': iex_quote_df, 'stats': iex_stats_df, 'peers': iex_peers_df, 'news1': iex_news_df, 'financials': iex_financials_df, 'earnings': iex_earnings_df, 'dividends': iex_dividends_df, 'company': iex_company_df, 'tdcalls': td_calls_df, 'tdputs': td_puts_df, 'calls': None, # yahoo - here for legacy 'news': None, # yahoo - here for legacy 'pricing': None, # yahoo - here for legacy 'puts': None # yahoo - here for legacy } return ticker_data
def extract(ticker=None, tickers=None, use_key=None, extract_mode='all', iex_datasets=None, redis_enabled=True, redis_address=None, redis_db=None, redis_password=None, redis_expire=None, s3_enabled=True, s3_address=None, s3_bucket=None, s3_access_key=None, s3_secret_key=None, s3_region_name=None, s3_secure=False, celery_disabled=True, broker_url=None, result_backend=None, label=None, verbose=False): """extract Extract all cached datasets for a stock ``ticker`` or a list of ``tickers`` and returns a dictionary. Please make sure the datasets are already cached in Redis before running this method. If not please refer to the ``analysis_engine.fetch.fetch`` function to prepare the datasets on your environment. Python example: .. code-block:: python from analysis_engine.extract import extract d = extract(ticker='NFLX') print(d) for k in d['NFLX']: print('dataset key: {}'.format(k)) This was created for reducing the amount of typying in Jupyter notebooks. It can be set up for use with a distributed engine as well with the optional arguments depending on your connectitivty requirements. .. note:: Please ensure Redis and Minio are running before trying to extract tickers **Stock tickers to extract** :param ticker: single stock ticker/symbol/ETF to extract :param tickers: optional - list of tickers to extract :param use_key: optional - extract historical key from Redis usually formatted ``<TICKER>_<date formatted YYYY-MM-DD>`` **(Optional) Data sources, datafeeds and datasets to gather** :param iex_datasets: list of strings for gathering specific `IEX datasets <https://iextrading.com/developer/docs/#stocks>`__ which are set as consts: ``analysis_engine.iex.consts.FETCH_*``. **(Optional) Redis connectivity arguments** :param redis_enabled: bool - toggle for auto-caching all datasets in Redis (default is ``True``) :param redis_address: Redis connection string format: ``host:port`` (default is ``localhost:6379``) :param redis_db: Redis db to use (default is ``0``) :param redis_password: optional - Redis password (default is ``None``) :param redis_expire: optional - Redis expire value (default is ``None``) **(Optional) Minio (S3) connectivity arguments** :param s3_enabled: bool - toggle for auto-archiving on Minio (S3) (default is ``True``) :param s3_address: Minio S3 connection string format: ``host:port`` (default is ``localhost:9000``) :param s3_bucket: S3 Bucket for storing the artifacts (default is ``dev``) which should be viewable on a browser: http://localhost:9000/minio/dev/ :param s3_access_key: S3 Access key (default is ``trexaccesskey``) :param s3_secret_key: S3 Secret key (default is ``trex123321``) :param s3_region_name: S3 region name (default is ``us-east-1``) :param s3_secure: Transmit using tls encryption (default is ``False``) **(Optional) Celery worker broker connectivity arguments** :param celery_disabled: bool - toggle synchronous mode or publish to an engine connected to the `Celery broker and backend <https://github.com/celery/celery#transports-and-backends>`__ (default is ``True`` - synchronous mode without an engine or need for a broker or backend for Celery) :param broker_url: Celery broker url (default is ``redis://0.0.0.0:6379/13``) :param result_backend: Celery backend url (default is ``redis://0.0.0.0:6379/14``) :param label: tracking log label **(Optional) Debugging** :param verbose: bool - show extract warnings and other debug logging (default is False) **Supported environment variables** :: export REDIS_ADDRESS="localhost:6379" export REDIS_DB="0" export S3_ADDRESS="localhost:9000" export S3_BUCKET="dev" export AWS_ACCESS_KEY_ID="trexaccesskey" export AWS_SECRET_ACCESS_KEY="trex123321" export AWS_DEFAULT_REGION="us-east-1" export S3_SECURE="0" export WORKER_BROKER_URL="redis://0.0.0.0:6379/13" export WORKER_BACKEND_URL="redis://0.0.0.0:6379/14" """ rec = {} extract_requests = [] use_tickers = tickers if ticker: use_tickers = [ticker] else: if not use_tickers: use_tickers = [] default_iex_datasets = [ 'daily', 'minute', 'quote', 'stats', 'peers', 'news', 'financials', 'earnings', 'dividends', 'company' ] if not iex_datasets: iex_datasets = default_iex_datasets if redis_enabled: if not redis_address: redis_address = os.getenv('REDIS_ADDRESS', 'localhost:6379') if not redis_password: redis_password = os.getenv('REDIS_PASSWORD', None) if not redis_db: redis_db = int(os.getenv('REDIS_DB', '0')) if not redis_expire: redis_expire = os.getenv('REDIS_EXPIRE', None) if s3_enabled: if not s3_address: s3_address = os.getenv('S3_ADDRESS', 'localhost:9000') if not s3_access_key: s3_access_key = os.getenv('AWS_ACCESS_KEY_ID', 'trexaccesskey') if not s3_secret_key: s3_secret_key = os.getenv('AWS_SECRET_ACCESS_KEY', 'trex123321') if not s3_region_name: s3_region_name = os.getenv('AWS_DEFAULT_REGION', 'us-east-1') if not s3_secure: s3_secure = os.getenv('S3_SECURE', '0') == '1' if not s3_bucket: s3_bucket = os.getenv('S3_BUCKET', 'dev') if not broker_url: broker_url = os.getenv('WORKER_BROKER_URL', 'redis://0.0.0.0:6379/13') if not result_backend: result_backend = os.getenv('WORKER_BACKEND_URL', 'redis://0.0.0.0:6379/14') if not label: label = 'get-latest' num_tickers = len(use_tickers) last_close_str = ae_utils.get_last_close_str() if iex_datasets: log.info('{} - getting latest for tickers={} ' 'iex={}'.format(label, num_tickers, json.dumps(iex_datasets))) else: log.info('{} - getting latest for tickers={}'.format( label, num_tickers)) ticker_key = use_key if not ticker_key: ticker_key = '{}_{}'.format(ticker, last_close_str) common_vals = {} common_vals['base_key'] = ticker_key common_vals['celery_disabled'] = celery_disabled common_vals['ticker'] = ticker common_vals['label'] = label common_vals['iex_datasets'] = iex_datasets common_vals['s3_enabled'] = s3_enabled common_vals['s3_bucket'] = s3_bucket common_vals['s3_address'] = s3_address common_vals['s3_secure'] = s3_secure common_vals['s3_region_name'] = s3_region_name common_vals['s3_access_key'] = s3_access_key common_vals['s3_secret_key'] = s3_secret_key common_vals['s3_key'] = ticker_key common_vals['redis_enabled'] = redis_enabled common_vals['redis_address'] = redis_address common_vals['redis_password'] = redis_password common_vals['redis_db'] = redis_db common_vals['redis_key'] = ticker_key common_vals['redis_expire'] = redis_expire common_vals['redis_address'] = redis_address common_vals['s3_address'] = s3_address log.info('{} - extract ticker={} last_close={} base_key={} ' 'redis_address={} s3_address={}'.format( label, ticker, last_close_str, common_vals['base_key'], common_vals['redis_address'], common_vals['s3_address'])) """ Extract Datasets """ iex_daily_status = ae_consts.FAILED iex_minute_status = ae_consts.FAILED iex_quote_status = ae_consts.FAILED iex_stats_status = ae_consts.FAILED iex_peers_status = ae_consts.FAILED iex_news_status = ae_consts.FAILED iex_financials_status = ae_consts.FAILED iex_earnings_status = ae_consts.FAILED iex_dividends_status = ae_consts.FAILED iex_company_status = ae_consts.FAILED yahoo_news_status = ae_consts.FAILED yahoo_options_status = ae_consts.FAILED yahoo_pricing_status = ae_consts.FAILED td_calls_status = ae_consts.FAILED td_puts_status = ae_consts.FAILED iex_daily_df = None iex_minute_df = None iex_quote_df = None iex_stats_df = None iex_peers_df = None iex_news_df = None iex_financials_df = None iex_earnings_df = None iex_dividends_df = None iex_company_df = None yahoo_option_calls_df = None yahoo_option_puts_df = None yahoo_pricing_df = None yahoo_news_df = None td_calls_df = None td_puts_df = None for ticker in use_tickers: req = api_requests.get_ds_dict(ticker=ticker, base_key=common_vals['base_key'], ds_id=label, service_dict=common_vals) extract_requests.append(req) # end of for all ticker in use_tickers extract_iex = True if extract_mode not in ['all', 'iex']: extract_iex = False extract_yahoo = True if extract_mode not in ['all', 'yahoo']: extract_yahoo = False extract_td = True if extract_mode not in ['all', 'td']: extract_td = False for extract_req in extract_requests: if 'daily' in iex_datasets or extract_iex: iex_daily_status, iex_daily_df = \ iex_extract_utils.extract_daily_dataset( extract_req) if iex_daily_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract iex_daily={}'.format(ticker)) if 'minute' in iex_datasets or extract_iex: iex_minute_status, iex_minute_df = \ iex_extract_utils.extract_minute_dataset( extract_req) if iex_minute_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract iex_minute={}'.format(ticker)) if 'quote' in iex_datasets or extract_iex: iex_quote_status, iex_quote_df = \ iex_extract_utils.extract_quote_dataset( extract_req) if iex_quote_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract iex_quote={}'.format(ticker)) if 'stats' in iex_datasets or extract_iex: iex_stats_df, iex_stats_df = \ iex_extract_utils.extract_stats_dataset( extract_req) if iex_stats_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract iex_stats={}'.format(ticker)) if 'peers' in iex_datasets or extract_iex: iex_peers_df, iex_peers_df = \ iex_extract_utils.extract_peers_dataset( extract_req) if iex_peers_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract iex_peers={}'.format(ticker)) if 'news' in iex_datasets or extract_iex: iex_news_status, iex_news_df = \ iex_extract_utils.extract_news_dataset( extract_req) if iex_news_status != ae_consts.SUCCESS: if verbose: log.warning('unable to extract iex_news={}'.format(ticker)) if 'financials' in iex_datasets or extract_iex: iex_financials_status, iex_financials_df = \ iex_extract_utils.extract_financials_dataset( extract_req) if iex_financials_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract iex_financials={}'.format(ticker)) if 'earnings' in iex_datasets or extract_iex: iex_earnings_status, iex_earnings_df = \ iex_extract_utils.extract_earnings_dataset( extract_req) if iex_earnings_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract iex_earnings={}'.format(ticker)) if 'dividends' in iex_datasets or extract_iex: iex_dividends_status, iex_dividends_df = \ iex_extract_utils.extract_dividends_dataset( extract_req) if iex_dividends_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract iex_dividends={}'.format(ticker)) if 'company' in iex_datasets or extract_iex: iex_company_status, iex_company_df = \ iex_extract_utils.extract_company_dataset( extract_req) if iex_company_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract iex_company={}'.format(ticker)) # end of iex extracts if extract_yahoo: yahoo_options_status, yahoo_option_calls_df = \ yahoo_extract_utils.extract_option_calls_dataset( extract_req) yahoo_options_status, yahoo_option_puts_df = \ yahoo_extract_utils.extract_option_puts_dataset( extract_req) if yahoo_options_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract yahoo_options={}'.format(ticker)) yahoo_pricing_status, yahoo_pricing_df = \ yahoo_extract_utils.extract_pricing_dataset( extract_req) if yahoo_pricing_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract yahoo_pricing={}'.format(ticker)) yahoo_news_status, yahoo_news_df = \ yahoo_extract_utils.extract_yahoo_news_dataset( extract_req) if yahoo_news_status != ae_consts.SUCCESS: if verbose: log.warning( 'unable to extract yahoo_news={}'.format(ticker)) # end of yahoo extracts if extract_td: td_calls_status, td_calls_df = \ td_extract_utils.extract_option_calls_dataset( extract_req) if td_calls_status != ae_consts.SUCCESS: if verbose: log.warning('unable to extract tdcalls={}'.format(ticker)) td_puts_status, td_puts_df = \ td_extract_utils.extract_option_puts_dataset( extract_req) if td_puts_status != ae_consts.SUCCESS: if verbose: log.warning('unable to extract tdputs={}'.format(ticker)) # td extracts ticker_data = {} ticker_data['daily'] = iex_daily_df ticker_data['minute'] = iex_minute_df ticker_data['quote'] = iex_quote_df ticker_data['stats'] = iex_stats_df ticker_data['peers'] = iex_peers_df ticker_data['news1'] = iex_news_df ticker_data['financials'] = iex_financials_df ticker_data['earnings'] = iex_earnings_df ticker_data['dividends'] = iex_dividends_df ticker_data['company'] = iex_company_df ticker_data['calls'] = yahoo_option_calls_df ticker_data['puts'] = yahoo_option_puts_df ticker_data['pricing'] = yahoo_pricing_df ticker_data['news'] = yahoo_news_df ticker_data['tdcalls'] = td_calls_df ticker_data['tdputs'] = td_puts_df rec[ticker] = ticker_data # end of for service_dict in extract_requests return rec
def latest(self, date_str=None, start_row=-200, extract_iex=True, extract_yahoo=False, extract_td=True, verbose=False, **kwargs): """latest Run the algorithm with the latest pricing data. Also supports running a backtest for a historical date in the pricing history (format ``YYYY-MM-DD``) :param date_str: optional - string start date ``YYYY-MM-DD`` default is the latest close date :param start_row: negative number of rows back from the end of the list in the data default is ``-200`` where this means the algorithm will process the latest 200 rows in the minute dataset :param extract_iex: bool flag for extracting from ``IEX`` :param extract_yahoo: bool flag for extracting from ``Yahoo`` which is disabled as of 1/2019 :param extract_td: bool flag for extracting from ``Tradier`` :param verbose: bool flag for logs :param kwargs: keyword arg dict """ use_date_str = date_str if not use_date_str: use_date_str = ae_utils.get_last_close_str() log.info(f'creating algo') self.algo_obj = base_algo.BaseAlgo( ticker=self.config_dict['ticker'], balance=self.config_dict['balance'], commission=self.config_dict['commission'], name=self.use_name, start_date=self.use_start_date, end_date=self.use_end_date, auto_fill=self.auto_fill, config_dict=self.config_dict, load_from_s3_bucket=self.load_from_s3_bucket, load_from_s3_key=self.load_from_s3_key, load_from_redis_key=self.load_from_redis_key, load_from_file=self.load_from_file, load_compress=self.load_compress, load_publish=self.load_publish, load_config=self.load_config, report_redis_key=self.report_redis_key, report_s3_bucket=self.report_s3_bucket, report_s3_key=self.report_s3_key, report_file=self.report_file, report_compress=self.report_compress, report_publish=self.report_publish, report_config=self.report_config, history_redis_key=self.history_redis_key, history_s3_bucket=self.history_s3_bucket, history_s3_key=self.history_s3_key, history_file=self.history_file, history_compress=self.history_compress, history_publish=self.history_publish, history_config=self.history_config, extract_redis_key=self.extract_redis_key, extract_s3_bucket=self.extract_s3_bucket, extract_s3_key=self.extract_s3_key, extract_file=self.extract_file, extract_save_dir=self.extract_save_dir, extract_compress=self.extract_compress, extract_publish=self.extract_publish, extract_config=self.extract_config, publish_to_slack=self.publish_to_slack, publish_to_s3=self.publish_to_s3, publish_to_redis=self.publish_to_redis, dataset_type=self.dataset_type, serialize_datasets=self.serialize_datasets, compress=self.compress, encoding=self.encoding, redis_enabled=self.redis_enabled, redis_key=self.redis_key, redis_address=self.redis_address, redis_db=self.redis_db, redis_password=self.redis_password, redis_expire=self.redis_expire, redis_serializer=self.redis_serializer, redis_encoding=self.redis_encoding, s3_enabled=self.s3_enabled, s3_key=self.s3_key, s3_address=self.s3_address, s3_bucket=self.s3_bucket, s3_access_key=self.s3_access_key, s3_secret_key=self.s3_secret_key, s3_region_name=self.s3_region_name, s3_secure=self.s3_secure, slack_enabled=self.slack_enabled, slack_code_block=self.slack_code_block, slack_full_width=self.slack_full_width, dataset_publish_extract=self.extract_publish, dataset_publish_history=self.history_publish, dataset_publish_report=self.report_publish, run_on_engine=self.run_on_engine, auth_url=self.broker_url, backend_url=self.backend_url, include_tasks=self.include_tasks, ssl_options=self.ssl_options, transport_options=self.transport_options, path_to_config_module=self.path_to_config_module, timeseries=self.timeseries, trade_strategy=self.trade_strategy, verbose=False, raise_on_err=self.raise_on_err) log.info(f'run latest - start') ticker = self.config_dict['ticker'] self.common_fetch_vals['base_key'] = f'{ticker}_{use_date_str}' extract_req = api_requests.get_ds_dict( ticker=ticker, base_key=self.common_fetch_vals['base_key'], ds_id=ticker, service_dict=self.common_fetch_vals) node_date_key = use_date_str.replace(f'{ticker}_', '') req = { 'id': use_date_str, 'ticker': ticker, 'date_key': self.common_fetch_vals['base_key'], 'date': node_date_key, 'req': extract_req } # fetch iex_daily_df = None iex_minute_df = None iex_quote_df = None iex_stats_df = None iex_peers_df = None iex_news_df = None iex_financials_df = None iex_earnings_df = None iex_dividends_df = None iex_company_df = None yahoo_option_calls_df = None yahoo_option_puts_df = None yahoo_pricing_df = None yahoo_news_df = None td_calls_df = None td_puts_df = None node_date_key = req['date'] dataset_node_id = req['id'] dataset_id = dataset_node_id label = (f'ticker={ticker} ' f'date={node_date_key}') if verbose: log.info(f'{label} - extract - start') if 'daily' in self.iex_datasets or extract_iex: iex_daily_status, iex_daily_df = \ iex_extract_utils.extract_daily_dataset( extract_req) if iex_daily_status != ae_consts.SUCCESS: if verbose: log.warning(f'unable to extract iex_daily={ticker}') if 'minute' in self.iex_datasets or extract_iex: iex_minute_status, iex_minute_df = \ iex_extract_utils.extract_minute_dataset( extract_req) if iex_minute_status != ae_consts.SUCCESS: if verbose: log.warning(f'unable to extract iex_minute={ticker}') if 'quote' in self.iex_datasets or extract_iex: iex_quote_status, iex_quote_df = \ iex_extract_utils.extract_quote_dataset( extract_req) if iex_quote_status != ae_consts.SUCCESS: if verbose: log.warning(f'unable to extract iex_quote={ticker}') if 'stats' in self.iex_datasets or extract_iex: iex_stats_status, iex_stats_df = \ iex_extract_utils.extract_stats_dataset( extract_req) if iex_stats_status != ae_consts.SUCCESS: if verbose: log.warning(f'unable to extract iex_stats={ticker}') if 'peers' in self.iex_datasets or extract_iex: iex_peers_status, iex_peers_df = \ iex_extract_utils.extract_peers_dataset( extract_req) if iex_peers_status != ae_consts.SUCCESS: if verbose: log.warning(f'unable to extract iex_peers={ticker}') if 'news' in self.iex_datasets or extract_iex: iex_news_status, iex_news_df = \ iex_extract_utils.extract_news_dataset( extract_req) if iex_news_status != ae_consts.SUCCESS: if verbose: log.warning(f'unable to extract iex_news={ticker}') if 'financials' in self.iex_datasets or extract_iex: iex_financials_status, iex_financials_df = \ iex_extract_utils.extract_financials_dataset( extract_req) if iex_financials_status != ae_consts.SUCCESS: if verbose: log.warning(f'unable to extract iex_financials={ticker}') if 'earnings' in self.iex_datasets or extract_iex: iex_earnings_status, iex_earnings_df = \ iex_extract_utils.extract_earnings_dataset( extract_req) if iex_earnings_status != ae_consts.SUCCESS: if verbose: log.warning(f'unable to extract iex_earnings={ticker}') if 'dividends' in self.iex_datasets or extract_iex: iex_dividends_status, iex_dividends_df = \ iex_extract_utils.extract_dividends_dataset( extract_req) if iex_dividends_status != ae_consts.SUCCESS: if verbose: log.warning(f'unable to extract iex_dividends={ticker}') if 'company' in self.iex_datasets or extract_iex: iex_company_status, iex_company_df = \ iex_extract_utils.extract_company_dataset( extract_req) if iex_company_status != ae_consts.SUCCESS: if verbose: log.warning(f'unable to extract iex_company={ticker}') # end of iex extracts if extract_yahoo: yahoo_options_status, yahoo_option_calls_df = \ yahoo_extract_utils.extract_option_calls_dataset( extract_req) yahoo_options_status, yahoo_option_puts_df = \ yahoo_extract_utils.extract_option_puts_dataset( extract_req) if yahoo_options_status != ae_consts.SUCCESS: if verbose: log.warning(f'unable to extract yahoo_options={ticker}') yahoo_pricing_status, yahoo_pricing_df = \ yahoo_extract_utils.extract_pricing_dataset( extract_req) if yahoo_pricing_status != ae_consts.SUCCESS: if verbose: log.warning(f'unable to extract yahoo_pricing={ticker}') yahoo_news_status, yahoo_news_df = \ yahoo_extract_utils.extract_yahoo_news_dataset( extract_req) if yahoo_news_status != ae_consts.SUCCESS: if verbose: log.warning(f'unable to extract yahoo_news={ticker}') # end of yahoo extracts if extract_td: """ Debug by setting: extract_req['verbose_td'] = True """ convert_to_datetime = [ 'date', 'created', 'ask_date', 'bid_date', 'trade_date' ] td_calls_status, td_calls_df = \ td_extract_utils.extract_option_calls_dataset( extract_req) if td_calls_status != ae_consts.SUCCESS: if verbose: log.warning(f'unable to extract tdcalls={ticker}') else: if ae_consts.is_df(df=td_calls_df): for c in convert_to_datetime: if c in td_calls_df: td_calls_df[c] = pd.to_datetime( td_calls_df[c], format=ae_consts.COMMON_TICK_DATE_FORMAT) if 'date' in td_calls_df: td_calls_df.sort_values('date', ascending=True) # end of converting dates td_puts_status, td_puts_df = \ td_extract_utils.extract_option_puts_dataset( extract_req) if td_puts_status != ae_consts.SUCCESS: if verbose: log.warning(f'unable to extract tdputs={ticker}') else: if ae_consts.is_df(df=td_puts_df): for c in convert_to_datetime: if c in td_puts_df: td_puts_df[c] = pd.to_datetime( td_puts_df[c], format=ae_consts.COMMON_TICK_DATE_FORMAT) if 'date' in td_puts_df: td_puts_df.sort_values('date', ascending=True) # end of converting dates # td extracts # map extracted data to DEFAULT_SERIALIZED_DATASETS ticker_data = {} ticker_data['daily'] = iex_daily_df ticker_data['minute'] = iex_minute_df ticker_data['quote'] = iex_quote_df ticker_data['stats'] = iex_stats_df ticker_data['peers'] = iex_peers_df ticker_data['news1'] = iex_news_df ticker_data['financials'] = iex_financials_df ticker_data['earnings'] = iex_earnings_df ticker_data['dividends'] = iex_dividends_df ticker_data['company'] = iex_company_df ticker_data['calls'] = yahoo_option_calls_df ticker_data['puts'] = yahoo_option_puts_df ticker_data['pricing'] = yahoo_pricing_df ticker_data['news'] = yahoo_news_df ticker_data['tdcalls'] = td_calls_df ticker_data['tdputs'] = td_puts_df algo_data_req = { ticker: [{ 'id': dataset_id, # id is currently the cache key in redis 'date': use_date_str, # used to confirm dates in asc order 'data': ticker_data, 'start_row': start_row }] } if verbose: log.info(f'extract - {label} ' f'dataset={len(algo_data_req[ticker])}') # this could be a separate celery task try: if verbose: log.info(f'handle_data START - {label} from ' f'{node_date_key}') self.algo_obj.handle_data(data=algo_data_req) if verbose: log.info(f'handle_data END - {label} from ' f'{node_date_key}') except Exception as e: a_name = self.algo_obj.get_name() a_debug_msg = self.algo_obj.get_debug_msg() if not a_debug_msg: a_debug_msg = 'debug message not set' # a_config_dict = ae_consts.ppj(self.algo_obj.config_dict) msg = (f'{label} - algo={a_name} ' f'encountered exception in handle_data tickers={ticker} ' f'from {node_date_key} ex={e} ' f'and failed during operation: {a_debug_msg}') log.critical(f'{msg}') # end try/ex log.info(f'run latest - create history') history_ds = self.algo_obj.create_history_dataset() self.history_df = pd.DataFrame(history_ds[ticker]) self.determine_latest_times_in_history() self.num_rows = len(self.history_df.index) if verbose: log.info(self.history_df[['minute', 'close']].tail(5)) log.info(f'run latest minute={self.end_date} - ' f'rows={self.num_rows} - done') return self.get_history()