def perform_extract(df_type, df_str, work_dict, dataset_id_key='ticker', scrub_mode='sort-by-date'): """perform_extract Helper for extracting from Redis or S3 :param df_type: datafeed type enum :param ds_str: dataset string name :param work_dict: incoming work request dictionary :param dataset_id_key: configurable dataset identifier key for tracking scrubbing and debugging errors :param scrub_mode: scrubbing mode on extraction for one-off cleanup before analysis """ status = FAILED ds_id = work_dict.get(dataset_id_key, None) label = work_dict.get('label', 'extract') s3_bucket = work_dict.get('s3_bucket', S3_BUCKET) s3_key = work_dict.get('s3_key', S3_KEY) redis_key = work_dict.get('redis_key', REDIS_KEY) s3_enabled = work_dict.get('s3_enabled', ENABLED_S3_UPLOAD) s3_access_key = work_dict.get('s3_access_key', S3_ACCESS_KEY) s3_secret_key = work_dict.get('s3_secret_key', S3_SECRET_KEY) s3_region_name = work_dict.get('s3_region_name', S3_REGION_NAME) s3_address = work_dict.get('s3_address', S3_ADDRESS) s3_secure = work_dict.get('s3_secure', S3_SECURE) redis_address = work_dict.get('redis_address', REDIS_ADDRESS) redis_password = work_dict.get('redis_password', REDIS_PASSWORD) redis_db = work_dict.get('redis_db', REDIS_DB) redis_expire = work_dict.get('redis_expire', REDIS_EXPIRE) log.debug('{} - {} - START - ds_id={} scrub_mode={} ' 'redis_address={}@{} redis_key={} ' 's3={} s3_address={} s3_bucket={} s3_key={}'.format( label, df_str, ds_id, scrub_mode, redis_address, redis_db, redis_key, s3_enabled, s3_address, s3_bucket, s3_key)) if ev('DEBUG_REDIS_EXTRACT', '0') == '1': log.info('{} - {} - ds_id={} redis ' 'pw={} expire={}'.format(label, df_str, ds_id, redis_password, redis_expire)) if ev('DEBUG_S3_EXTRACT', '0') == '1': log.info('{} - {} - ds_id={} s3 ' 'ak={} sk={} region={} secure={}'.format( label, df_str, ds_id, s3_access_key, s3_secret_key, s3_region_name, s3_secure)) extract_res = None try: extract_res = build_df.build_df_from_redis(label=label, address=redis_address, db=redis_db, key=redis_key) except Exception as e: extract_res = None log.error('{} - {} - ds_id={} failed extract from ' 'redis={}@{} key={} ex={}'.format(label, df_str, ds_id, redis_address, redis_db, redis_key, e)) # end of try/ex extract from redis if not extract_res: return status, None valid_df = (extract_res['status'] == SUCCESS and extract_res['rec']['valid_df']) if not valid_df: if ev('DEBUG_S3_EXTRACT', '0') == '1': log.error('{} - {} ds_id={} invalid df ' 'status={} extract_res={}'.format( label, df_str, ds_id, get_status(status=extract_res['status']), extract_res)) return status, None extract_df = extract_res['rec']['data'] log.debug('{} - {} ds_id={} extract scrub={}'.format( label, df_str, ds_id, scrub_mode)) scrubbed_df = scrub_utils.extract_scrub_dataset( label=label, scrub_mode=scrub_mode, datafeed_type=df_type, msg_format='df={} date_str={}', ds_id=ds_id, df=extract_df) status = SUCCESS return status, scrubbed_df
def extract_option_puts_dataset(work_dict, scrub_mode='sort-by-date'): """extract_option_puts_dataset Extract the TD options puts for a ticker and return it as a ``pandas.Dataframe`` :param work_dict: dictionary of args :param scrub_mode: type of scrubbing handler to run """ label = f'{work_dict.get("label", "extract")}' ds_id = work_dict.get('ticker') df_type = td_consts.DATAFEED_TD_PUTS df_str = td_consts.get_datafeed_str_td(df_type=df_type) redis_key = work_dict.get('redis_key', work_dict.get('tdputs', 'missing-redis-key')) s3_key = work_dict.get('s3_key', work_dict.get('tdputs', 'missing-s3-key')) redis_host = work_dict.get('redis_host', None) redis_port = work_dict.get('redis_port', None) redis_db = work_dict.get('redis_db', ae_consts.REDIS_DB) verbose = work_dict.get('verbose_td', False) if verbose: log.info(f'{label} - {df_str} - start - redis_key={redis_key} ' f's3_key={s3_key}') if not redis_host and not redis_port: redis_host = ae_consts.REDIS_ADDRESS.split(':')[0] redis_port = ae_consts.REDIS_ADDRESS.split(':')[1] exp_date_str = None puts_df = None status = ae_consts.NOT_RUN try: redis_rec = redis_get.get_data_from_redis_key(label=label, host=redis_host, port=redis_port, db=redis_db, password=work_dict.get( 'password', None), key=redis_key, decompress_df=True) status = redis_rec['status'] if verbose: log.info(f'{label} - {df_str} redis get data key={redis_key} ' f'status={ae_consts.get_status(status=status)}') if status == ae_consts.SUCCESS: puts_json = None if 'puts' in redis_rec['rec']['data']: puts_json = redis_rec['rec']['data']['puts'] else: puts_json = redis_rec['rec']['data'] if verbose: log.info(f'{label} - {df_str} redis convert puts to df') try: puts_df = pd.read_json(puts_json, orient='records') if len(puts_df.index) == 0: return ae_consts.SUCCESS, None if 'date' not in puts_df: log.debug('failed to find date column in TD puts ' f'df={puts_df} len={len(puts_df.index)}') return ae_consts.SUCCESS, None puts_df.sort_values(by=['date', 'strike']) """ for i, r in calls_df.iterrows(): print(r['date']) convert_epochs = [ 'ask_date', 'bid_date', 'trade_date' ] for c in convert_epochs: if c in puts_df: puts_df[c] = pd.DatetimeIndex(pd.to_datetime( puts_df[c], format=ae_consts.COMMON_TICK_DATE_FORMAT )).tz_localize( 'UTC').tz_convert( 'US/Eastern') # dates converted """ exp_date_str = (puts_df['exp_date'].iloc[-1]) puts_df['date'] = puts_df['date'].dt.strftime( ae_consts.COMMON_TICK_DATE_FORMAT) except Exception: log.debug(f'{label} - {df_str} redis_key={redis_key} ' 'no puts df found') return ae_consts.EMPTY, None # end of try/ex to convert to df if verbose: log.info(f'{label} - {df_str} redis_key={redis_key} ' f'puts={len(puts_df.index)} exp_date={exp_date_str}') else: if verbose: log.info(f'{label} - {df_str} did not find valid redis ' f'option puts in redis_key={redis_key} ' f'status={ae_consts.get_status(status=status)}') except Exception as e: log.debug(f'{label} - {df_str} - ds_id={ds_id} failed getting option ' f'puts from redis={redis_host}:{redis_port}@{redis_db} ' f'key={redis_key} ex={e}') return ae_consts.ERR, None # end of try/ex extract from redis if verbose: log.info( f'{label} - {df_str} ds_id={ds_id} extract scrub={scrub_mode}') scrubbed_df = scrub_utils.extract_scrub_dataset( label=label, scrub_mode=scrub_mode, datafeed_type=df_type, msg_format='df={} date_str={}', ds_id=ds_id, df=puts_df) status = ae_consts.SUCCESS return status, scrubbed_df
def extract_option_calls_dataset( work_dict, scrub_mode='sort-by-date'): """extract_option_calls_dataset Extract the TD options calls for a ticker and return it as a ``pandas.Dataframe`` :param work_dict: dictionary of args :param scrub_mode: type of scrubbing handler to run """ label = '{}'.format(work_dict.get('label', 'extract')) ds_id = work_dict.get('ticker') df_type = td_consts.DATAFEED_TD_CALLS df_str = td_consts.get_datafeed_str_td(df_type=df_type) redis_key = work_dict.get( 'redis_key', work_dict.get('tdcalls', 'missing-redis-key')) s3_key = work_dict.get( 's3_key', work_dict.get('tdcalls', 'missing-s3-key')) redis_host = work_dict.get( 'redis_host', None) redis_port = work_dict.get( 'redis_port', None) redis_db = work_dict.get( 'redis_db', ae_consts.REDIS_DB) verbose = work_dict.get( 'verbose_td', False) if verbose: log.info( '{} - {} - start - redis_key={} s3_key={}'.format( label, df_str, redis_key, s3_key)) if not redis_host and not redis_port: redis_host = ae_consts.REDIS_ADDRESS.split(':')[0] redis_port = ae_consts.REDIS_ADDRESS.split(':')[1] exp_date_str = None calls_df = None status = ae_consts.NOT_RUN try: redis_rec = redis_get.get_data_from_redis_key( label=label, host=redis_host, port=redis_port, db=redis_db, password=work_dict.get('password', None), key=redis_key, decompress_df=True) status = redis_rec['status'] if verbose: log.info( '{} - {} redis get data key={} status={}'.format( label, df_str, redis_key, ae_consts.get_status(status=status))) if status == ae_consts.SUCCESS: calls_json = None if 'calls' in redis_rec['rec']['data']: calls_json = redis_rec['rec']['data']['calls'] else: calls_json = redis_rec['rec']['data'] if verbose: log.info( '{} - {} redis convert calls to df'.format( label, df_str)) exp_date_str = None try: calls_df = pd.read_json( calls_json, orient='records') if len(calls_df.index) == 0: return ae_consts.SUCCESS, None if 'date' not in calls_df: log.debug( 'failed to find date column in TD calls ' 'df={}'.format( calls_df, len(calls_df.index))) return ae_consts.SUCCESS, None calls_df.sort_values( by=[ 'date', 'strike' ]) """ for i, r in calls_df.iterrows(): print(r['date']) convert_epochs = [ 'ask_date', 'bid_date', 'trade_date' ] for c in convert_epochs: if c in calls_df: calls_df[c] = pd.DatetimeIndex(pd.to_datetime( calls_df[c], format=ae_consts.COMMON_TICK_DATE_FORMAT )).tz_localize( 'UTC').tz_convert( 'US/Eastern') # dates converted """ exp_date_str = ( calls_df['exp_date'].iloc[-1]) calls_df['date'] = calls_df['date'].dt.strftime( ae_consts.COMMON_TICK_DATE_FORMAT) except Exception as f: log.error( '{} - {} redis_key={} ' 'no calls df found or ex={}'.format( label, df_str, redis_key, f)) return ae_consts.EMPTY, None # end of try/ex to convert to df if verbose: log.info( '{} - {} redis_key={} calls={} exp_date={}'.format( label, df_str, redis_key, len(calls_df.index), exp_date_str)) else: if verbose: log.info( '{} - {} did not find valid redis option calls ' 'in redis_key={} status={}'.format( label, df_str, redis_key, ae_consts.get_status(status=status))) except Exception as e: log.debug( '{} - {} - ds_id={} failed getting option calls from ' 'redis={}:{}@{} key={} ex={}'.format( label, df_str, ds_id, redis_host, redis_port, redis_db, redis_key, e)) return ae_consts.ERR, None # end of try/ex extract from redis if verbose: log.info( '{} - {} ds_id={} extract scrub={}'.format( label, df_str, ds_id, scrub_mode)) scrubbed_df = scrub_utils.extract_scrub_dataset( label=label, scrub_mode=scrub_mode, datafeed_type=df_type, msg_format='df={} date_str={}', ds_id=ds_id, df=calls_df) status = ae_consts.SUCCESS return status, scrubbed_df
def extract_option_puts_dataset( work_dict, scrub_mode='sort-by-date'): """extract_option_puts_dataset Extract the Yahoo options puts for a ticker and return it as a ``pandas.Dataframe`` :param work_dict: dictionary of args :param scrub_mode: type of scrubbing handler to run """ label = '{}-puts'.format(work_dict.get('label', 'extract')) ds_id = work_dict.get('ticker') df_type = DATAFEED_OPTIONS_YAHOO df_str = get_datafeed_str_yahoo(df_type=df_type) redis_key = work_dict.get( 'redis_key', work_dict.get('options', 'missing-redis-key')) s3_key = work_dict.get( 's3_key', work_dict.get('options', 'missing-s3-key')) redis_host = work_dict.get( 'redis_host', None) redis_port = work_dict.get( 'redis_port', None) redis_db = work_dict.get( 'redis_db', REDIS_DB) log.debug( '{} - {} - start - redis_key={} s3_key={}'.format( label, df_str, redis_key, s3_key)) if not redis_host and not redis_port: redis_host = REDIS_ADDRESS.split(':')[0] redis_port = REDIS_ADDRESS.split(':')[1] exp_date_str = None puts_df = None status = NOT_RUN try: redis_rec = redis_get.get_data_from_redis_key( label=label, host=redis_host, port=redis_port, db=redis_db, password=work_dict.get('password', None), key=redis_key) status = redis_rec['status'] log.debug( '{} - {} redis get data key={} status={}'.format( label, df_str, redis_key, get_status(status=status))) if status == SUCCESS: exp_date_str = redis_rec['rec']['data']['exp_date'] puts_json = redis_rec['rec']['data']['puts'] log.debug( '{} - {} redis convert puts to df'.format( label, df_str)) try: puts_df = pd.read_json( puts_json, orient='records') except Exception as f: log.debug( '{} - {} redis_key={} ' 'no puts df found'.format( label, df_str, redis_key)) return EMPTY, None # end of try/ex to convert to df log.debug( '{} - {} redis_key={} puts={} exp_date={}'.format( label, df_str, redis_key, len(puts_df.index), exp_date_str)) else: log.debug( '{} - {} did not find valid redis option puts ' 'in redis_key={} status={}'.format( label, df_str, redis_key, get_status(status=status))) except Exception as e: log.debug( '{} - {} - ds_id={} failed getting option puts from ' 'redis={}:{}@{} key={} ex={}'.format( label, df_str, ds_id, redis_host, redis_port, redis_db, redis_key, e)) return ERR, None # end of try/ex extract from redis log.debug( '{} - {} ds_id={} extract scrub={}'.format( label, df_str, ds_id, scrub_mode)) scrubbed_df = scrub_utils.extract_scrub_dataset( label=label, scrub_mode=scrub_mode, datafeed_type=df_type, msg_format='df={} date_str={}', ds_id=ds_id, df=puts_df) status = SUCCESS return status, scrubbed_df
def extract_pricing_dataset( work_dict, scrub_mode='sort-by-date'): """extract_pricing_dataset Extract the Yahoo pricing data for a ticker and return it as a pandas Dataframe :param work_dict: dictionary of args :param scrub_mode: type of scrubbing handler to run """ label = work_dict.get('label', 'extract') ds_id = work_dict.get('ticker') df_type = DATAFEED_PRICING_YAHOO df_str = get_datafeed_str_yahoo(df_type=df_type) redis_key = work_dict.get( 'redis_key', work_dict.get('pricing', 'missing-redis-key')) s3_key = work_dict.get( 's3_key', work_dict.get('pricing', 'missing-s3-key')) redis_host = work_dict.get( 'redis_host', None) redis_port = work_dict.get( 'redis_port', None) redis_db = work_dict.get( 'redis_db', REDIS_DB) log.debug( '{} - {} - start - redis_key={} s3_key={}'.format( label, df_str, redis_key, s3_key)) if not redis_host and not redis_port: redis_host = REDIS_ADDRESS.split(':')[0] redis_port = REDIS_ADDRESS.split(':')[1] df = None status = NOT_RUN try: redis_rec = redis_get.get_data_from_redis_key( label=label, host=redis_host, port=redis_port, db=redis_db, password=work_dict.get('password', None), key=redis_key) status = redis_rec['status'] log.debug( '{} - {} redis get data key={} status={}'.format( label, df_str, redis_key, get_status(status=status))) if status == SUCCESS: log.debug( '{} - {} redis convert pricing to json'.format( label, df_str)) cached_dict = redis_rec['rec']['data'] log.debug( '{} - {} redis convert pricing to df'.format( label, df_str)) try: df = pd.DataFrame( cached_dict, index=[0]) except Exception as f: log.debug( '{} - {} redis_key={} ' 'no pricing df found'.format( label, df_str, redis_key)) return EMPTY, None # end of try/ex to convert to df log.debug( '{} - {} redis_key={} done convert pricing to df'.format( label, df_str, redis_key)) else: log.debug( '{} - {} did not find valid redis pricing ' 'in redis_key={} status={}'.format( label, df_str, redis_key, get_status(status=status))) except Exception as e: log.debug( '{} - {} - ds_id={} failed getting pricing from ' 'redis={}:{}@{} key={} ex={}'.format( label, df_str, ds_id, redis_host, redis_port, redis_db, redis_key, e)) return ERR, None # end of try/ex extract from redis log.debug( '{} - {} ds_id={} extract scrub={}'.format( label, df_str, ds_id, scrub_mode)) scrubbed_df = scrub_utils.extract_scrub_dataset( label=label, scrub_mode=scrub_mode, datafeed_type=df_type, msg_format='df={} date_str={}', ds_id=ds_id, df=df) status = SUCCESS return status, scrubbed_df
def extract_pricing_dataset(work_dict, scrub_mode='sort-by-date'): """extract_pricing_dataset Extract the Yahoo pricing data for a ticker and return it as a pandas Dataframe :param work_dict: dictionary of args :param scrub_mode: type of scrubbing handler to run """ label = work_dict.get('label', 'extract') ds_id = work_dict.get('ticker') df_type = yahoo_consts.DATAFEED_PRICING_YAHOO df_str = yahoo_consts.get_datafeed_str_yahoo(df_type=df_type) redis_key = work_dict.get('redis_key', work_dict.get('pricing', 'missing-redis-key')) s3_key = work_dict.get('s3_key', work_dict.get('pricing', 'missing-s3-key')) redis_host = work_dict.get('redis_host', None) redis_port = work_dict.get('redis_port', None) redis_db = work_dict.get('redis_db', ae_consts.REDIS_DB) log.debug( f'{label} - {df_str} - start - redis_key={redis_key} s3_key={s3_key}') if not redis_host and not redis_port: redis_host = ae_consts.REDIS_ADDRESS.split(':')[0] redis_port = ae_consts.REDIS_ADDRESS.split(':')[1] df = None status = ae_consts.NOT_RUN try: redis_rec = redis_get.get_data_from_redis_key(label=label, host=redis_host, port=redis_port, db=redis_db, password=work_dict.get( 'password', None), key=redis_key, decompress_df=True) status = redis_rec['status'] log.debug(f'{label} - {df_str} redis get data key={redis_key} ' f'status={ae_consts.get_status(status=status)}') if status == ae_consts.SUCCESS: log.debug(f'{label} - {df_str} redis convert pricing to json') cached_dict = redis_rec['rec']['data'] log.debug(f'{label} - {df_str} redis convert pricing to df') try: df = pd.DataFrame(cached_dict, index=[0]) except Exception: log.debug(f'{label} - {df_str} redis_key={redis_key} ' 'no pricing df found') return ae_consts.EMPTY, None # end of try/ex to convert to df log.debug(f'{label} - {df_str} redis_key={redis_key} done ' 'convert pricing to df') else: log.debug(f'{label} - {df_str} did not find valid redis pricing ' f'in redis_key={redis_key} ' f'status={ae_consts.get_status(status=status)}') except Exception as e: log.debug( f'{label} - {df_str} - ds_id={ds_id} failed getting pricing from ' f'redis={redis_host}:{redis_port}@{redis_db} ' f'key={redis_key} ex={e}') return ae_consts.ERR, None # end of try/ex extract from redis log.debug(f'{label} - {df_str} ds_id={ds_id} extract scrub={scrub_mode}') scrubbed_df = scrub_utils.extract_scrub_dataset( label=label, scrub_mode=scrub_mode, datafeed_type=df_type, msg_format='df={} date_str={}', ds_id=ds_id, df=df) status = ae_consts.SUCCESS return status, scrubbed_df
def extract_option_puts_dataset(work_dict, scrub_mode='sort-by-date'): """extract_option_puts_dataset Extract the Yahoo options puts for a ticker and return it as a ``pandas.Dataframe`` :param work_dict: dictionary of args :param scrub_mode: type of scrubbing handler to run """ label = f'{work_dict.get("label", "extract")}-puts' ds_id = work_dict.get('ticker') df_type = yahoo_consts.DATAFEED_OPTIONS_YAHOO df_str = yahoo_consts.get_datafeed_str_yahoo(df_type=df_type) redis_key = work_dict.get('redis_key', work_dict.get('puts', 'missing-redis-key')) s3_key = work_dict.get('s3_key', work_dict.get('puts', 'missing-s3-key')) redis_host = work_dict.get('redis_host', None) redis_port = work_dict.get('redis_port', None) redis_db = work_dict.get('redis_db', ae_consts.REDIS_DB) log.debug( f'{label} - {df_str} - start - redis_key={redis_key} s3_key={s3_key}') if not redis_host and not redis_port: redis_host = ae_consts.REDIS_ADDRESS.split(':')[0] redis_port = ae_consts.REDIS_ADDRESS.split(':')[1] exp_date_str = None puts_df = None status = ae_consts.NOT_RUN try: redis_rec = redis_get.get_data_from_redis_key(label=label, host=redis_host, port=redis_port, db=redis_db, password=work_dict.get( 'password', None), key=redis_key, decompress_df=True) status = redis_rec['status'] log.debug(f'{label} - {df_str} redis get data key={redis_key} ' f'status={ae_consts.get_status(status=status)}') if status == ae_consts.SUCCESS: puts_json = None if 'puts' in redis_rec['rec']['data']: puts_json = redis_rec['rec']['data']['puts'] else: puts_json = redis_rec['rec']['data'] log.debug(f'{label} - {df_str} redis convert puts to df') try: puts_df = pd.read_json(puts_json, orient='records') exp_epoch_value = puts_df['expiration'].iloc[-1] exp_date_str = ae_utils.convert_epoch_to_datetime_string( epoch=exp_epoch_value, fmt=ae_consts.COMMON_DATE_FORMAT, use_utc=True) except Exception: log.debug(f'{label} - {df_str} redis_key={redis_key} ' 'no puts df found') return ae_consts.EMPTY, None # end of try/ex to convert to df log.debug(f'{label} - {df_str} redis_key={redis_key} ' f'puts={len(puts_df.index)} exp_date={exp_date_str}') else: log.debug( f'{label} - {df_str} did not find valid redis option puts ' f'in redis_key={redis_key} ' f'status={ae_consts.get_status(status=status)}') except Exception as e: log.debug( f'{label} - {df_str} - ds_id={ds_id} failed getting option puts ' f'from redis={redis_host}:{redis_port}@{redis_db} ' f'key={redis_key} ex={e}') return ae_consts.ERR, None # end of try/ex extract from redis log.debug(f'{label} - {df_str} ds_id={ds_id} extract scrub={scrub_mode}') scrubbed_df = scrub_utils.extract_scrub_dataset( label=label, scrub_mode=scrub_mode, datafeed_type=df_type, msg_format='df={} date_str={}', ds_id=ds_id, df=puts_df) status = ae_consts.SUCCESS return status, scrubbed_df
def perform_extract(df_type, df_str, work_dict, dataset_id_key='ticker', scrub_mode='sort-by-date', verbose=False): """perform_extract Helper for extracting from Redis or S3 :param df_type: datafeed type enum :param ds_str: dataset string name :param work_dict: incoming work request dictionary :param dataset_id_key: configurable dataset identifier key for tracking scrubbing and debugging errors :param scrub_mode: scrubbing mode on extraction for one-off cleanup before analysis :param verbose: optional - boolean for turning on logging """ status = ae_consts.FAILED ds_id = work_dict.get(dataset_id_key, None) label = work_dict.get('label', 'extract') s3_bucket = work_dict.get('s3_bucket', ae_consts.S3_BUCKET) s3_key = work_dict.get('s3_key', ae_consts.S3_KEY) redis_key = work_dict.get('redis_key', ae_consts.REDIS_KEY) s3_enabled = work_dict.get('s3_enabled', ae_consts.ENABLED_S3_UPLOAD) s3_access_key = work_dict.get('s3_access_key', ae_consts.S3_ACCESS_KEY) s3_secret_key = work_dict.get('s3_secret_key', ae_consts.S3_SECRET_KEY) s3_region_name = work_dict.get('s3_region_name', ae_consts.S3_REGION_NAME) s3_address = work_dict.get('s3_address', ae_consts.S3_ADDRESS) s3_secure = work_dict.get('s3_secure', ae_consts.S3_SECURE) redis_address = work_dict.get('redis_address', ae_consts.REDIS_ADDRESS) redis_password = work_dict.get('redis_password', ae_consts.REDIS_PASSWORD) redis_db = work_dict.get('redis_db', ae_consts.REDIS_DB) redis_expire = work_dict.get('redis_expire', ae_consts.REDIS_EXPIRE) if verbose: log.info( f'{label} - {df_str} - START - ' f'ds_id={ds_id} scrub_mode={scrub_mode} ' f'redis_address={redis_address}@{redis_db} redis_key={redis_key} ' f's3={s3_enabled} s3_address={s3_address} s3_bucket={s3_bucket} ' f's3_key={s3_key}') if verbose or ae_consts.ev('DEBUG_REDIS_EXTRACT', '0') == '1': log.info(f'{label} - {df_str} - ds_id={ds_id} redis ' f'pw={redis_password} expire={redis_expire}') if verbose or ae_consts.ev('DEBUG_S3_EXTRACT', '0') == '1': log.info(f'{label} - {df_str} - ds_id={ds_id} s3 ' f'ak={s3_access_key} sk={s3_secret_key} ' f'region={s3_region_name} secure={s3_secure}') extract_res = None try: extract_res = build_df.build_df_from_redis(label=label, address=redis_address, db=redis_db, key=redis_key, verbose=verbose) except Exception as e: extract_res = None log.error(f'{label} - {df_str} - ds_id={ds_id} failed extract from ' f'redis={redis_address}@{redis_db} key={redis_key} ex={e}') # end of try/ex extract from redis if not extract_res: return status, None valid_df = (extract_res['status'] == ae_consts.SUCCESS and extract_res['rec']['valid_df']) if not valid_df: if verbose or ae_consts.ev('DEBUG_S3_EXTRACT', '0') == '1': log.error( f'{label} - {df_str} ds_id={ds_id} invalid df ' f'status={ae_consts.get_status(status=extract_res["status"])} ' f'extract_res={extract_res}') return status, None extract_df = extract_res['rec']['data'] if verbose: log.info( f'{label} - {df_str} ds_id={ds_id} extract scrub={scrub_mode}') scrubbed_df = scrub_utils.extract_scrub_dataset( label=label, scrub_mode=scrub_mode, datafeed_type=df_type, msg_format='df={} date_str={}', ds_id=ds_id, df=extract_df) status = ae_consts.SUCCESS return status, scrubbed_df
def extract_option_calls_dataset(ticker=None, date=None, work_dict=None, scrub_mode='sort-by-date', verbose=False): """extract_option_calls_dataset Extract the TD options calls for a ticker and return a tuple (status, ``pandas.Dataframe``) .. code-block:: python import analysis_engine.td.extract_df_from_redis as td_extract # extract by historical date is also supported as an arg # date='2019-02-15' calls_status, calls_df = td_extract.extract_option_calls_dataset( ticker='SPY') print(calls_df) :param ticker: string ticker to extract :param date: optional - string date to extract formatted ``YYYY-MM-DD`` :param work_dict: dictionary of args :param scrub_mode: optional - string type of scrubbing handler to run :param verbose: optional - boolean for turning on logging """ label = 'extract_td_calls' latest_close_date = ae_utils.get_last_close_str() use_date = date if work_dict: if not ticker: ticker = work_dict.get('ticker', None) label = f'{work_dict.get("label", label)}' if not use_date: use_date = latest_close_date ds_id = ticker df_type = td_consts.DATAFEED_TD_CALLS df_str = td_consts.get_datafeed_str_td(df_type=df_type) redis_db = ae_consts.REDIS_DB redis_key = f'{ticker}_{use_date}_tdcalls' redis_host, redis_port = ae_consts.get_redis_host_and_port(req=work_dict) redis_password = ae_consts.REDIS_PASSWORD s3_key = redis_key if work_dict: redis_db = work_dict.get('redis_db', redis_db) redis_password = work_dict.get('redis_password', redis_password) verbose = work_dict.get('verbose_td', verbose) if verbose: log.info(f'{label} - {df_str} - start - redis_key={redis_key} ' f's3_key={s3_key}') exp_date_str = None calls_df = None status = ae_consts.NOT_RUN try: redis_rec = redis_get.get_data_from_redis_key(label=label, host=redis_host, port=redis_port, db=redis_db, password=redis_password, key=redis_key, decompress_df=True) status = redis_rec['status'] if verbose: log.info(f'{label} - {df_str} redis get data key={redis_key} ' f'status={ae_consts.get_status(status=status)}') if status == ae_consts.SUCCESS: calls_json = None if 'tdcalls' in redis_rec['rec']['data']: calls_json = redis_rec['rec']['data']['tdcalls'] elif 'calls' in redis_rec['rec']['data']: calls_json = redis_rec['rec']['data']['calls'] else: calls_json = redis_rec['rec']['data'] if not calls_json: return ae_consts.SUCCESS, pd.DataFrame([]) if verbose: log.info(f'{label} - {df_str} redis convert calls to df') exp_date_str = None try: calls_df = pd.read_json(calls_json, orient='records') if len(calls_df.index) == 0: return ae_consts.SUCCESS, pd.DataFrame([]) if 'date' not in calls_df: if verbose: log.error( 'failed to find date column in TD calls ' f'df={calls_df} from lens={len(calls_df.index)}') return ae_consts.SUCCESS, pd.DataFrame([]) calls_df.sort_values(by=['date', 'strike']) """ for i, r in calls_df.iterrows(): print(r['date']) convert_epochs = [ 'ask_date', 'bid_date', 'trade_date' ] for c in convert_epochs: if c in calls_df: calls_df[c] = pd.DatetimeIndex(pd.to_datetime( calls_df[c], format=ae_consts.COMMON_TICK_DATE_FORMAT )).tz_localize( 'UTC').tz_convert( 'US/Eastern') # dates converted """ exp_date_str = (calls_df['exp_date'].iloc[-1]) calls_df['date'] = calls_df['date'].dt.strftime( ae_consts.COMMON_TICK_DATE_FORMAT) except Exception as f: not_fixed = True if ('Can only use .dt accessor with ' 'datetimelike values') in str(f): try: log.critical(f'fixing dates in {redis_key}') # remove epoch second data and # use only the millisecond date values bad_date = ae_consts.EPOCH_MINIMUM_DATE calls_df['date'][calls_df['date'] < bad_date] = None calls_df = calls_df.dropna(axis=0, how='any') fmt = ae_consts.COMMON_TICK_DATE_FORMAT calls_df['date'] = pd.to_datetime( calls_df['date'], unit='ms').dt.strftime(fmt) not_fixed = False except Exception as g: log.critical( f'failed to parse date column {calls_df["date"]} ' f'with dt.strftime ex={f} and EPOCH EX={g}') return ae_consts.SUCCESS, pd.DataFrame([]) # if able to fix error or not if not_fixed: log.debug(f'{label} - {df_str} redis_key={redis_key} ' f'no calls df found or ex={f}') return ae_consts.SUCCESS, pd.DataFrame([]) # if unable to fix - return out log.error(f'{label} - {df_str} redis_key={redis_key} ' f'no calls df found or ex={f}') return ae_consts.SUCCESS, pd.DataFrame([]) # end of try/ex to convert to df if verbose: log.info( f'{label} - {df_str} redis_key={redis_key} ' f'calls={len(calls_df.index)} exp_date={exp_date_str}') else: if verbose: log.info(f'{label} - {df_str} did not find valid redis ' f'option calls in redis_key={redis_key} ' f'status={ae_consts.get_status(status=status)}') except Exception as e: if verbose: log.error( f'{label} - {df_str} - ds_id={ds_id} failed getting option ' f'calls from redis={redis_host}:{redis_port}@{redis_db} ' f'key={redis_key} ex={e}') return ae_consts.ERR, pd.DataFrame([]) # end of try/ex extract from redis if verbose: log.info( f'{label} - {df_str} ds_id={ds_id} extract scrub={scrub_mode}') scrubbed_df = scrub_utils.extract_scrub_dataset( label=label, scrub_mode=scrub_mode, datafeed_type=df_type, msg_format='df={} date_str={}', ds_id=ds_id, df=calls_df) status = ae_consts.SUCCESS return status, scrubbed_df
def extract_option_puts_dataset(ticker=None, date=None, work_dict=None, scrub_mode='sort-by-date', verbose=False): """extract_option_puts_dataset Extract the TD options puts for a ticker and return a tuple (status, ``pandas.Dataframe``) .. code-block:: python import analysis_engine.td.extract_df_from_redis as td_extract # extract by historical date is also supported as an arg # date='2019-02-15' puts_status, puts_df = td_extract.extract_option_puts_dataset( ticker='SPY') print(puts_df) :param ticker: string ticker to extract :param date: optional - string date to extract formatted ``YYYY-MM-DD`` :param work_dict: dictionary of args :param scrub_mode: optional - string type of scrubbing handler to run :param verbose: optional - boolean for turning on logging """ label = 'extract_td_puts' latest_close_date = ae_utils.get_last_close_str() use_date = date if work_dict: if not ticker: ticker = work_dict.get('ticker', None) label = f'{work_dict.get("label", label)}' if not use_date: use_date = latest_close_date ds_id = ticker df_type = td_consts.DATAFEED_TD_PUTS df_str = td_consts.get_datafeed_str_td(df_type=df_type) redis_db = ae_consts.REDIS_DB redis_key = f'{ticker}_{use_date}_tdputs' redis_host, redis_port = ae_consts.get_redis_host_and_port(req=work_dict) redis_password = ae_consts.REDIS_PASSWORD s3_key = redis_key if work_dict: redis_db = work_dict.get('redis_db', redis_db) redis_password = work_dict.get('redis_password', redis_password) verbose = work_dict.get('verbose_td', verbose) if verbose: log.info(f'{label} - {df_str} - start - redis_key={redis_key} ' f's3_key={s3_key}') exp_date_str = None puts_df = None status = ae_consts.NOT_RUN try: redis_rec = redis_get.get_data_from_redis_key(label=label, host=redis_host, port=redis_port, db=redis_db, password=redis_password, key=redis_key, decompress_df=True) status = redis_rec['status'] if verbose: log.info(f'{label} - {df_str} redis get data key={redis_key} ' f'status={ae_consts.get_status(status=status)}') if status == ae_consts.SUCCESS: puts_json = None if 'tdputs' in redis_rec['rec']['data']: puts_json = redis_rec['rec']['data']['tdputs'] if 'puts' in redis_rec['rec']['data']: puts_json = redis_rec['rec']['data']['puts'] else: puts_json = redis_rec['rec']['data'] if not puts_json: return ae_consts.SUCCESS, pd.DataFrame([]) if verbose: log.info(f'{label} - {df_str} redis convert puts to df') try: puts_df = pd.read_json(puts_json, orient='records') if len(puts_df.index) == 0: return ae_consts.SUCCESS, pd.DataFrame([]) if 'date' not in puts_df: log.debug('failed to find date column in TD puts ' f'df={puts_df} len={len(puts_df.index)}') return ae_consts.SUCCESS, pd.DataFrame([]) puts_df.sort_values(by=['date', 'strike']) """ for i, r in calls_df.iterrows(): print(r['date']) convert_epochs = [ 'ask_date', 'bid_date', 'trade_date' ] for c in convert_epochs: if c in puts_df: puts_df[c] = pd.DatetimeIndex(pd.to_datetime( puts_df[c], format=ae_consts.COMMON_TICK_DATE_FORMAT )).tz_localize( 'UTC').tz_convert( 'US/Eastern') # dates converted """ exp_date_str = (puts_df['exp_date'].iloc[-1]) puts_df['date'] = puts_df['date'].dt.strftime( ae_consts.COMMON_TICK_DATE_FORMAT) except Exception: log.debug(f'{label} - {df_str} redis_key={redis_key} ' 'no puts df found') return ae_consts.SUCCESS, pd.DataFrame([]) # end of try/ex to convert to df if verbose: log.info(f'{label} - {df_str} redis_key={redis_key} ' f'puts={len(puts_df.index)} exp_date={exp_date_str}') else: if verbose: log.info(f'{label} - {df_str} did not find valid redis ' f'option puts in redis_key={redis_key} ' f'status={ae_consts.get_status(status=status)}') except Exception as e: if verbose: log.error( f'{label} - {df_str} - ds_id={ds_id} failed getting option ' f'puts from redis={redis_host}:{redis_port}@{redis_db} ' f'key={redis_key} ex={e}') return ae_consts.ERR, pd.DataFrame([]) # end of try/ex extract from redis if verbose: log.info( f'{label} - {df_str} ds_id={ds_id} extract scrub={scrub_mode}') scrubbed_df = scrub_utils.extract_scrub_dataset( label=label, scrub_mode=scrub_mode, datafeed_type=df_type, msg_format='df={} date_str={}', ds_id=ds_id, df=puts_df) status = ae_consts.SUCCESS return status, scrubbed_df