示例#1
0
def debug_msg(label, datafeed_type, msg_format, date_str, df):
    """debug_msg

    Debug helper for debugging scrubbing handlers

    :param label: log label
    :param datafeed_type: fetch type
    :param msg_format: message to include
    :param date_str: date string
    :param df: ``pandas DataFrame`` or ``None``
    """

    msg = msg_format.format('_', date_str)

    dft_msg = ''
    if (datafeed_type == yahoo_consts.DATAFEED_PRICING_YAHOO
            or datafeed_type == yahoo_consts.DATAFEED_OPTIONS_YAHOO
            or datafeed_type == yahoo_consts.DATAFEED_NEWS_YAHOO):
        dft_msg = yahoo_consts.get_datafeed_str_yahoo(df_type=datafeed_type)
    elif (datafeed_type == td_consts.DATAFEED_TD_CALLS
          or datafeed_type == td_consts.DATAFEED_TD_PUTS):
        dft_msg = td_consts.get_datafeed_str_td(df_type=datafeed_type)
    else:
        dft_msg = iex_consts.get_datafeed_str(df_type=datafeed_type)

    if ae_consts.ev('DEBUG_FETCH', '0') == '1':
        if 'START' in msg:
            log.info('{} - {} -------------------------'
                     '------------------------------------'.format(
                         label, dft_msg))
        msg = msg_format.format(df, date_str),
        if hasattr(df, 'empty'):
            log.info('{} - {} - {} found df={} '
                     'columns={}'.format(label, dft_msg, msg, df,
                                         df.columns.values))
        else:
            log.info('{} - {} - {} not df={}'.format(label, dft_msg, msg, df))

        if 'END' in msg:
            log.info('{} - {} -------------------------'
                     '------------------------------------'.format(
                         label, dft_msg))
    else:
        log.info('{} - {} - {}'.format(label, dft_msg, msg))
def extract_pricing_dataset(
        work_dict,
        scrub_mode='sort-by-date'):
    """extract_pricing_dataset

    Extract the Yahoo pricing data for a ticker and
    return it as a pandas Dataframe

    :param work_dict: dictionary of args
    :param scrub_mode: type of scrubbing handler to run
    """
    label = work_dict.get('label', 'extract')
    ds_id = work_dict.get('ticker')
    df_type = DATAFEED_PRICING_YAHOO
    df_str = get_datafeed_str_yahoo(df_type=df_type)
    redis_key = work_dict.get(
        'redis_key',
        work_dict.get('pricing', 'missing-redis-key'))
    s3_key = work_dict.get(
        's3_key',
        work_dict.get('pricing', 'missing-s3-key'))
    redis_host = work_dict.get(
        'redis_host',
        None)
    redis_port = work_dict.get(
        'redis_port',
        None)
    redis_db = work_dict.get(
        'redis_db',
        REDIS_DB)

    log.debug(
        '{} - {} - start - redis_key={} s3_key={}'.format(
            label,
            df_str,
            redis_key,
            s3_key))

    if not redis_host and not redis_port:
        redis_host = REDIS_ADDRESS.split(':')[0]
        redis_port = REDIS_ADDRESS.split(':')[1]

    df = None
    status = NOT_RUN
    try:
        redis_rec = redis_get.get_data_from_redis_key(
            label=label,
            host=redis_host,
            port=redis_port,
            db=redis_db,
            password=work_dict.get('password', None),
            key=redis_key)

        status = redis_rec['status']
        log.debug(
            '{} - {} redis get data key={} status={}'.format(
                label,
                df_str,
                redis_key,
                get_status(status=status)))

        if status == SUCCESS:
            log.debug(
                '{} - {} redis convert pricing to json'.format(
                    label,
                    df_str))
            cached_dict = redis_rec['rec']['data']
            log.debug(
                '{} - {} redis convert pricing to df'.format(
                    label,
                    df_str))
            try:
                df = pd.DataFrame(
                    cached_dict,
                    index=[0])
            except Exception as f:
                log.debug(
                    '{} - {} redis_key={} '
                    'no pricing df found'.format(
                        label,
                        df_str,
                        redis_key))
                return EMPTY, None
            # end of try/ex to convert to df
            log.debug(
                '{} - {} redis_key={} done convert pricing to df'.format(
                    label,
                    df_str,
                    redis_key))
        else:
            log.debug(
                '{} - {} did not find valid redis pricing '
                'in redis_key={} status={}'.format(
                    label,
                    df_str,
                    redis_key,
                    get_status(status=status)))

    except Exception as e:
        log.debug(
            '{} - {} - ds_id={} failed getting pricing from '
            'redis={}:{}@{} key={} ex={}'.format(
                label,
                df_str,
                ds_id,
                redis_host,
                redis_port,
                redis_db,
                redis_key,
                e))
        return ERR, None
    # end of try/ex extract from redis

    log.debug(
        '{} - {} ds_id={} extract scrub={}'.format(
            label,
            df_str,
            ds_id,
            scrub_mode))

    scrubbed_df = scrub_utils.extract_scrub_dataset(
        label=label,
        scrub_mode=scrub_mode,
        datafeed_type=df_type,
        msg_format='df={} date_str={}',
        ds_id=ds_id,
        df=df)

    status = SUCCESS

    return status, scrubbed_df
示例#3
0
def extract_pricing_dataset(work_dict, scrub_mode='sort-by-date'):
    """extract_pricing_dataset

    Extract the Yahoo pricing data for a ticker and
    return it as a pandas Dataframe

    :param work_dict: dictionary of args
    :param scrub_mode: type of scrubbing handler to run
    """
    label = work_dict.get('label', 'extract')
    ds_id = work_dict.get('ticker')
    df_type = yahoo_consts.DATAFEED_PRICING_YAHOO
    df_str = yahoo_consts.get_datafeed_str_yahoo(df_type=df_type)
    redis_key = work_dict.get('redis_key',
                              work_dict.get('pricing', 'missing-redis-key'))
    s3_key = work_dict.get('s3_key', work_dict.get('pricing',
                                                   'missing-s3-key'))
    redis_host = work_dict.get('redis_host', None)
    redis_port = work_dict.get('redis_port', None)
    redis_db = work_dict.get('redis_db', ae_consts.REDIS_DB)

    log.debug(
        f'{label} - {df_str} - start - redis_key={redis_key} s3_key={s3_key}')

    if not redis_host and not redis_port:
        redis_host = ae_consts.REDIS_ADDRESS.split(':')[0]
        redis_port = ae_consts.REDIS_ADDRESS.split(':')[1]

    df = None
    status = ae_consts.NOT_RUN
    try:
        redis_rec = redis_get.get_data_from_redis_key(label=label,
                                                      host=redis_host,
                                                      port=redis_port,
                                                      db=redis_db,
                                                      password=work_dict.get(
                                                          'password', None),
                                                      key=redis_key,
                                                      decompress_df=True)

        status = redis_rec['status']
        log.debug(f'{label} - {df_str} redis get data key={redis_key} '
                  f'status={ae_consts.get_status(status=status)}')

        if status == ae_consts.SUCCESS:
            log.debug(f'{label} - {df_str} redis convert pricing to json')
            cached_dict = redis_rec['rec']['data']
            log.debug(f'{label} - {df_str} redis convert pricing to df')
            try:
                df = pd.DataFrame(cached_dict, index=[0])
            except Exception:
                log.debug(f'{label} - {df_str} redis_key={redis_key} '
                          'no pricing df found')
                return ae_consts.EMPTY, None
            # end of try/ex to convert to df
            log.debug(f'{label} - {df_str} redis_key={redis_key} done '
                      'convert pricing to df')
        else:
            log.debug(f'{label} - {df_str} did not find valid redis pricing '
                      f'in redis_key={redis_key} '
                      f'status={ae_consts.get_status(status=status)}')

    except Exception as e:
        log.debug(
            f'{label} - {df_str} - ds_id={ds_id} failed getting pricing from '
            f'redis={redis_host}:{redis_port}@{redis_db} '
            f'key={redis_key} ex={e}')
        return ae_consts.ERR, None
    # end of try/ex extract from redis

    log.debug(f'{label} - {df_str} ds_id={ds_id} extract scrub={scrub_mode}')

    scrubbed_df = scrub_utils.extract_scrub_dataset(
        label=label,
        scrub_mode=scrub_mode,
        datafeed_type=df_type,
        msg_format='df={} date_str={}',
        ds_id=ds_id,
        df=df)

    status = ae_consts.SUCCESS

    return status, scrubbed_df
def extract_option_puts_dataset(
        work_dict,
        scrub_mode='sort-by-date'):
    """extract_option_puts_dataset

    Extract the Yahoo options puts for a ticker and
    return it as a ``pandas.Dataframe``

    :param work_dict: dictionary of args
    :param scrub_mode: type of scrubbing handler to run
    """
    label = '{}-puts'.format(work_dict.get('label', 'extract'))
    ds_id = work_dict.get('ticker')
    df_type = DATAFEED_OPTIONS_YAHOO
    df_str = get_datafeed_str_yahoo(df_type=df_type)
    redis_key = work_dict.get(
        'redis_key',
        work_dict.get('options', 'missing-redis-key'))
    s3_key = work_dict.get(
        's3_key',
        work_dict.get('options', 'missing-s3-key'))
    redis_host = work_dict.get(
        'redis_host',
        None)
    redis_port = work_dict.get(
        'redis_port',
        None)
    redis_db = work_dict.get(
        'redis_db',
        REDIS_DB)

    log.debug(
        '{} - {} - start - redis_key={} s3_key={}'.format(
            label,
            df_str,
            redis_key,
            s3_key))

    if not redis_host and not redis_port:
        redis_host = REDIS_ADDRESS.split(':')[0]
        redis_port = REDIS_ADDRESS.split(':')[1]

    exp_date_str = None
    puts_df = None
    status = NOT_RUN
    try:
        redis_rec = redis_get.get_data_from_redis_key(
            label=label,
            host=redis_host,
            port=redis_port,
            db=redis_db,
            password=work_dict.get('password', None),
            key=redis_key)

        status = redis_rec['status']
        log.debug(
            '{} - {} redis get data key={} status={}'.format(
                label,
                df_str,
                redis_key,
                get_status(status=status)))

        if status == SUCCESS:
            exp_date_str = redis_rec['rec']['data']['exp_date']
            puts_json = redis_rec['rec']['data']['puts']
            log.debug(
                '{} - {} redis convert puts to df'.format(
                    label,
                    df_str))
            try:
                puts_df = pd.read_json(
                    puts_json,
                    orient='records')
            except Exception as f:
                log.debug(
                    '{} - {} redis_key={} '
                    'no puts df found'.format(
                        label,
                        df_str,
                        redis_key))
                return EMPTY, None
            # end of try/ex to convert to df
            log.debug(
                '{} - {} redis_key={} puts={} exp_date={}'.format(
                    label,
                    df_str,
                    redis_key,
                    len(puts_df.index),
                    exp_date_str))
        else:
            log.debug(
                '{} - {} did not find valid redis option puts '
                'in redis_key={} status={}'.format(
                    label,
                    df_str,
                    redis_key,
                    get_status(status=status)))

    except Exception as e:
        log.debug(
            '{} - {} - ds_id={} failed getting option puts from '
            'redis={}:{}@{} key={} ex={}'.format(
                label,
                df_str,
                ds_id,
                redis_host,
                redis_port,
                redis_db,
                redis_key,
                e))
        return ERR, None
    # end of try/ex extract from redis

    log.debug(
        '{} - {} ds_id={} extract scrub={}'.format(
            label,
            df_str,
            ds_id,
            scrub_mode))

    scrubbed_df = scrub_utils.extract_scrub_dataset(
        label=label,
        scrub_mode=scrub_mode,
        datafeed_type=df_type,
        msg_format='df={} date_str={}',
        ds_id=ds_id,
        df=puts_df)

    status = SUCCESS

    return status, scrubbed_df
示例#5
0
def extract_option_puts_dataset(work_dict, scrub_mode='sort-by-date'):
    """extract_option_puts_dataset

    Extract the Yahoo options puts for a ticker and
    return it as a ``pandas.Dataframe``

    :param work_dict: dictionary of args
    :param scrub_mode: type of scrubbing handler to run
    """
    label = f'{work_dict.get("label", "extract")}-puts'
    ds_id = work_dict.get('ticker')
    df_type = yahoo_consts.DATAFEED_OPTIONS_YAHOO
    df_str = yahoo_consts.get_datafeed_str_yahoo(df_type=df_type)
    redis_key = work_dict.get('redis_key',
                              work_dict.get('puts', 'missing-redis-key'))
    s3_key = work_dict.get('s3_key', work_dict.get('puts', 'missing-s3-key'))
    redis_host = work_dict.get('redis_host', None)
    redis_port = work_dict.get('redis_port', None)
    redis_db = work_dict.get('redis_db', ae_consts.REDIS_DB)

    log.debug(
        f'{label} - {df_str} - start - redis_key={redis_key} s3_key={s3_key}')

    if not redis_host and not redis_port:
        redis_host = ae_consts.REDIS_ADDRESS.split(':')[0]
        redis_port = ae_consts.REDIS_ADDRESS.split(':')[1]

    exp_date_str = None
    puts_df = None
    status = ae_consts.NOT_RUN
    try:
        redis_rec = redis_get.get_data_from_redis_key(label=label,
                                                      host=redis_host,
                                                      port=redis_port,
                                                      db=redis_db,
                                                      password=work_dict.get(
                                                          'password', None),
                                                      key=redis_key,
                                                      decompress_df=True)

        status = redis_rec['status']
        log.debug(f'{label} - {df_str} redis get data key={redis_key} '
                  f'status={ae_consts.get_status(status=status)}')

        if status == ae_consts.SUCCESS:
            puts_json = None
            if 'puts' in redis_rec['rec']['data']:
                puts_json = redis_rec['rec']['data']['puts']
            else:
                puts_json = redis_rec['rec']['data']
            log.debug(f'{label} - {df_str} redis convert puts to df')
            try:
                puts_df = pd.read_json(puts_json, orient='records')
                exp_epoch_value = puts_df['expiration'].iloc[-1]
                exp_date_str = ae_utils.convert_epoch_to_datetime_string(
                    epoch=exp_epoch_value,
                    fmt=ae_consts.COMMON_DATE_FORMAT,
                    use_utc=True)
            except Exception:
                log.debug(f'{label} - {df_str} redis_key={redis_key} '
                          'no puts df found')
                return ae_consts.EMPTY, None
            # end of try/ex to convert to df
            log.debug(f'{label} - {df_str} redis_key={redis_key} '
                      f'puts={len(puts_df.index)} exp_date={exp_date_str}')
        else:
            log.debug(
                f'{label} - {df_str} did not find valid redis option puts '
                f'in redis_key={redis_key} '
                f'status={ae_consts.get_status(status=status)}')

    except Exception as e:
        log.debug(
            f'{label} - {df_str} - ds_id={ds_id} failed getting option puts '
            f'from redis={redis_host}:{redis_port}@{redis_db} '
            f'key={redis_key} ex={e}')
        return ae_consts.ERR, None
    # end of try/ex extract from redis

    log.debug(f'{label} - {df_str} ds_id={ds_id} extract scrub={scrub_mode}')

    scrubbed_df = scrub_utils.extract_scrub_dataset(
        label=label,
        scrub_mode=scrub_mode,
        datafeed_type=df_type,
        msg_format='df={} date_str={}',
        ds_id=ds_id,
        df=puts_df)

    status = ae_consts.SUCCESS

    return status, scrubbed_df