Пример #1
0
def update_corporate_fk():
    # corporates matching 1st 6 digits of CUSIP-9 with entity CUSIP-6
    s = select([
        Corporate.id, Entity.id, Corporate.entity_id, Corporate.cusip9,
        Entity.cusip6
    ]).where(Corporate.entity_id.is_(None)).where(
        func.left(Corporate.cusip9, 6) == Entity.cusip6)
    rows = db.execute(s).fetchall()

    # update entity_id for CUSIP-6 matches
    for cid, eid, _, cusip9, cusip6 in rows:
        db.query(Corporate).filter(Corporate.id == cid).update(
            {Corporate.entity_id: eid}, synchronize_session=False)
        db.commit()

    # corporates matching company_symbol with entity ticker
    s = select([
        Corporate.id, Entity.id, Corporate.entity_id, Corporate.company_symbol,
        Entity.ticker
    ]).where(Corporate.entity_id.is_(None)).where(
        Corporate.company_symbol == Entity.ticker)
    rows = db.execute(s).fetchall()

    # update entity_id for ticker matches
    for r in rows:
        db.query(Corporate).filter(Corporate.id == r[0]).update(
            {Corporate.entity_id: r[1]}, synchronize_session=False)
        db.commit()
Пример #2
0
def update_equity_px_fk():
    # equity pxs matching ticker with entity ticker
    equity_px = Base.metadata.tables['equity_px']
    entity = Base.metadata.tables['entity']
    s = update(equity_px).where(equity_px.columns.entity_id.is_(None)).where(
        equity_px.columns.ticker == entity.columns.ticker).values(
            entity_id=entity.columns.id)
    db.execute(s)
    db.commit()
Пример #3
0
def update_financial_fk():
    # financials matching ticker with entity ticker
    financial = Base.metadata.tables['financial']
    entity = Base.metadata.tables['entity']
    s = update(financial).where(financial.columns.entity_id.is_(None)).where(
        financial.columns.ticker == entity.columns.ticker).values(
            entity_id=entity.columns.id)
    db.execute(s)
    db.commit()
Пример #4
0
def update_corp_tx_fk():
    corp_tx = Base.metadata.tables['corp_tx']
    corporate = Base.metadata.tables['corporate']

    # corp_tx cusip_id matches corporate cusip9
    s = update(corp_tx).where(corp_tx.columns.corporate_id.is_(None)).where(
        corp_tx.columns.cusip_id == corporate.columns.cusip9).values(
            corporate_id=corporate.columns.id)
    db.execute(s)
    db.commit()
Пример #5
0
def _get_ltv_data(ids, release_window, release_count):
    # subquery corp_txs for release_count financial releases during window
    days_from_release = CorpTx.trans_dt - Financial.earnings_release_date
    fin_count = func.count(Financial.id).label('fin_count')

    window_stmt = db.query(CorpTx.id) \
        .join(Financial, Financial.ticker == CorpTx.company_symbol) \
        .filter(CorpTx.id.in_(ids),
                days_from_release > 0,
                days_from_release <= release_window) \
        .group_by(CorpTx.id) \
        .having(fin_count == release_count) \
        .subquery().alias('window_sq')

    # query financials with equity px and interest rate data
    ltv = (Financial.totaldebt / ev).label('ltv')
    s = db.query(ltv) \
        .select_from(CorpTx) \
        .join(window_stmt, window_stmt.c.id == CorpTx.id) \
        .join(EquityPx,
              and_(CorpTx.company_symbol == EquityPx.ticker,
                   CorpTx.trans_dt == EquityPx.date)) \
        .join(InterestRate, CorpTx.trans_dt == InterestRate.date) \
        .join(Financial, Financial.ticker == CorpTx.company_symbol) \
        .filter(days_from_release > 0,
                days_from_release <= release_window) \
        .distinct(CorpTx.cusip_id, CorpTx.trans_dt) \
        .order_by(
            CorpTx.cusip_id,
            CorpTx.trans_dt.desc(),
            Financial.earnings_release_date.desc()) \

    return db.execute(s).fetchall()
Пример #6
0
def counts_by_sym(ids):
    s = db.query(CorpTx.company_symbol, func.count(CorpTx.company_symbol)) \
        .filter(CorpTx.id.in_(ids)) \
        .group_by(CorpTx.company_symbol)

    results = db.execute(s).fetchall()
    return results
Пример #7
0
def get_target_stats(ids):
    """Returns mean, std (pop)"""
    s = db.query(func.avg(CorpTx.close_yld),
                 func.stddev_pop(CorpTx.close_yld)) \
        .filter(CorpTx.id.in_(ids))

    return db.execute(s).fetchall()[0]
Пример #8
0
def get_corptx_ids(tickers, release_window, release_count, limit, tick_limit,
                   sd, ed):
    """
    Gets sample_count ids for ticker with at least release_count earnings
    within release_window.

    params
    ticker (str): corp_tx company_symbol
    release_window (int): relevant earnings_release_date window
    release_count (int): min number of earnings releases during release_window
    limit (int): samples return limit

    returns
    ids (1D np arr): matching ids
    """
    # subquery corp_txs for release_count financial releases during window
    days_from_release = CorpTx.trans_dt - Financial.earnings_release_date
    fin_count = func.count(Financial.id).label('fin_count')

    window_stmt = db.query(CorpTx.id) \
        .distinct(CorpTx.cusip_id, CorpTx.trans_dt) \
        .join(Financial, Financial.ticker == CorpTx.company_symbol) \
        .filter(CorpTx.company_symbol.in_(tickers),
                CorpTx.close_yld > 0,
                CorpTx.close_yld <= 20.0,
                days_from_release <= release_window,
                days_from_release > 0) \
        .group_by(CorpTx.id) \
        .having(fin_count == release_count) \
        .subquery('window_sq')

    # partition by row number
    rn = func.row_number() \
        .over(partition_by=CorpTx.company_symbol,
              order_by=CorpTx.id).label('rn')

    sq = db.query(CorpTx.id, rn) \
        .join(window_stmt, CorpTx.id == window_stmt.c.id) \
        .join(EquityPx,
              and_(CorpTx.company_symbol == EquityPx.ticker,
                   CorpTx.trans_dt == EquityPx.date)) \
        .join(InterestRate, CorpTx.trans_dt == InterestRate.date) \
        .join(Financial, Financial.ticker == CorpTx.company_symbol) \
        .filter(days_from_release <= release_window,
                days_from_release > 0,
                CorpTx.trans_dt <= ed,
                CorpTx.trans_dt > sd).subquery('sq')

    s = db.query(CorpTx.id) \
        .distinct(CorpTx.id, CorpTx.trans_dt) \
        .join(sq, sq.c.id == CorpTx.id) \
        .filter(sq.c.rn <= tick_limit*release_count) \
        .order_by(CorpTx.trans_dt.asc()) \
        .limit(limit)

    ids = db.execute(s).fetchall()
    return np.unique(np.array(ids).flatten())
Пример #9
0
def _get_fwd_credit_tx_data(ids, days_lower, days_upper):
    s = db.query(*FWD_CTX_COLS) \
        .select_from(ctx1) \
        .join(ctx2, ctx1.company_symbol == ctx2.company_symbol) \
        .join(InterestRate, ctx2.trans_dt == InterestRate.date) \
        .filter(ctx1.id.in_(ids),
                ctx2.close_yld > 0,
                days_fwd > days_lower,
                days_fwd <= days_upper) \
        .distinct(ctx1.id) \
        .order_by(ctx1.id, ctx2.trans_dt.asc())

    return db.execute(s).fetchall()
Пример #10
0
def build_dataset():
    # list of tickers and transaction dates
    s = db.query(CorpTx.company_symbol, CorpTx.trans_dt) \
            .filter(CorpTx.company_symbol == EquityPx.ticker) \
            .filter(CorpTx.trans_dt == EquityPx.date) \
            .group_by(CorpTx.company_symbol, CorpTx.trans_dt)
    ticks_and_dts = db.execute(s).fetchall()

    # find sample for n combos
    n = 1
    ticks_and_dts = ticks_and_dts[:n]
    for tick, dt in ticks_and_dts:
        fins = find_sample(tick, dt)
        if len(fins) > 0:
            print(fins)
Пример #11
0
def get_fwd_credit_tx_ids(base_ids, days_lower, days_upper):
    s = db.query(ctx1.id, ctx2.id) \
        .select_from(ctx1) \
        .join(ctx2, ctx1.cusip_id == ctx2.cusip_id) \
        .join(InterestRate, ctx1.trans_dt == InterestRate.date) \
        .filter(ctx1.id.in_(base_ids),
                ctx1.close_yld > 0,
                ctx1.close_yld <= 20,
                ctx2.close_yld > 0,
                days_fwd > days_lower,
                days_fwd <= days_upper) \
        .distinct(ctx1.id) \
        .order_by(ctx1.id, ctx2.trans_dt.asc())

    id_pairs = db.execute(s).fetchall()
    return np.array(id_pairs)
Пример #12
0
def _get_credit_tx_data(ids):
    """
    Gets credit samples for given corp_tx id

    params
    id (int): corp_tx id of credit

    returns
    samples (list): queried samples
    """
    s = db.query(*CTX_COLS) \
        .select_from(CorpTx) \
        .filter(CorpTx.id.in_(ids)) \
        .join(InterestRate, CorpTx.trans_dt == InterestRate.date)

    return db.execute(s).fetchall()
Пример #13
0
def _get_financial_data(ids, release_window, release_count, limit):
    """
    Gets financials and equity px time series for corp_tx id

    params
    id (int): corp_tx id of credit
    release_window (int): relevant financials day window filter
    limit (int): samples limit

    returns
    samples (list): queried samples
    """
    # subquery corp_txs for release_count financial releases during window
    window_stmt = db.query(CorpTx.id) \
        .join(Financial, Financial.ticker == CorpTx.company_symbol) \
        .filter(CorpTx.id.in_(ids),
                days_from_release > 0,
                days_from_release <= release_window) \
        .group_by(CorpTx.id) \
        .having(fin_count == release_count) \
        .subquery().alias('window_sq')

    # query financials with equity px and interest rate data
    s = db.query(*FIN_COLS) \
        .select_from(CorpTx) \
        .join(window_stmt, window_stmt.c.id == CorpTx.id) \
        .join(EquityPx,
              and_(CorpTx.company_symbol == EquityPx.ticker,
                   CorpTx.trans_dt == EquityPx.date)) \
        .join(InterestRate, CorpTx.trans_dt == InterestRate.date) \
        .join(Financial, Financial.ticker == CorpTx.company_symbol) \
        .filter(days_from_release > 0,
                days_from_release <= release_window) \
        .distinct(CorpTx.cusip_id, CorpTx.trans_dt,
                  Financial.earnings_release_date) \
        .order_by(
            CorpTx.cusip_id,
            CorpTx.trans_dt.desc(),
            Financial.earnings_release_date.desc()) \
        .limit(limit)

    return db.execute(s).fetchall()
Пример #14
0
def find_sample(ticker, trans_date, fin_count=8):
    # calculate minimum date with buffer for CY changes
    tdelta = timedelta(days=720)
    # min_dt = (trans_date-timedelta(days=days)).strftime('%Y-%m-%d')

    s = db.query(Financial.ticker, CorpTx.trans_dt,
                 Financial.earnings_release_date,
                 Financial.revenueadjusted) \
        .filter(and_(CorpTx.trans_dt == CorpTx.trans_dt,
                     CorpTx.close_pr <= 100)) \
        .filter(EquityPx.date == CorpTx.trans_dt) \
        .filter(CorpTx.company_symbol == EquityPx.ticker) \
        .filter(Financial.ticker == EquityPx.ticker) \
        .filter(Financial.earnings_release_date < CorpTx.trans_dt) \
        .filter(Financial.earnings_release_date >= CorpTx.trans_dt-tdelta) \
        .group_by(Financial.ticker, CorpTx.trans_dt,
                  Financial.earnings_release_date,
                  Financial.revenueadjusted) \
        .order_by(CorpTx.trans_dt.desc(),
                  Financial.earnings_release_date.asc()) \
        .limit(fin_count)

    return db.execute(s).fetchall()
Пример #15
0
def get_credit_targets(ids):
    s = db.query(CorpTx.close_yld) \
        .filter(CorpTx.id.in_(ids))
    ylds = db.execute(s).fetchall()
    return np.array(ylds).flatten()
Пример #16
0
def build_feature_data(day_window=100, sample_count=5, standardize=True):
    """
    Generates dataset consisting of interest rate, financial,
    credit terms, and yield data by transaction. Financial data
    is normalized by EV. All features are standardized across time.

    returns:
        - X (np arr): interest rate, financial, credit terms
        - Y (np arr): yield to worst
    """
    # query corporate transactions to generate terms, equity price, and
    # last reported financials within given day range
    s = db.query(CorpTx.company_symbol, CorpTx.trans_dt, CorpTx.mtrty_dt,
                 EquityPx.adj_close, CorpTx.close_yld, Financial,
                 InterestRate.BAMLC0A1CAAASYTW,
                 InterestRate.BAMLC0A2CAASYTW,
                 InterestRate.BAMLC0A3CASYTW,
                 InterestRate.BAMLC0A4CBBBSYTW,
                 InterestRate.BAMLH0A1HYBBSYTW,
                 InterestRate.BAMLH0A2HYBSYTW,
                 InterestRate.BAMLH0A3HYCSYTW,
                 InterestRate.BAMLC1A0C13YSYTW,
                 InterestRate.BAMLC2A0C35YSYTW,
                 InterestRate.BAMLC3A0C57YSYTW,
                 InterestRate.BAMLC4A0C710YSYTW,
                 InterestRate.BAMLC7A0C1015YSYTW,
                 InterestRate.BAMLC8A0C15PYSYTW) \
        .select_from(CorpTx) \
        .join(EquityPx,
              and_(CorpTx.company_symbol == EquityPx.ticker,
                   CorpTx.trans_dt == EquityPx.date)) \
        .join(InterestRate, CorpTx.trans_dt == InterestRate.date) \
        .join(Financial, CorpTx.company_symbol == Financial.ticker) \
        .filter(
            and_(
                CorpTx.trans_dt-Financial.earnings_release_date <= day_window,
                CorpTx.trans_dt-Financial.earnings_release_date > 0)) \
        .order_by(Financial.ticker,
                  CorpTx.trans_dt.desc(),
                  Financial.earnings_release_date.desc()) \
        .distinct(Financial.ticker, CorpTx.trans_dt) \
        .order_by(func.random()) \
        .limit(sample_count)

    samples = db.execute(s).fetchall()
    # convert to df
    colnames = [
        'company_symbol', 'trans_dt', 'mtrty_dt', 'adj_close', 'close_yld'
    ]
    colnames += Financial.__table__.columns.keys()
    rate_cols = InterestRate.__table__.columns.keys()
    rate_cols = [c for c in rate_cols if c not in ['id', 'date']]
    colnames += [c for c in rate_cols if c not in ['id', 'date']]
    df = pd.DataFrame(samples, columns=colnames)

    # calculate days to maturity
    df['days_to_mtrty'] = (df.mtrty_dt - df.trans_dt) / np.timedelta64(1, 'D')

    # drop non-financial cols and fill nans with 0
    NON_FIN_COLS = [
        'company_symbol', 'id', 'ticker', 'entity_id', 'earnings_release_date',
        'filing_date', 'period', 'period_start', 'period_end', 'mtrty_dt',
        'trans_dt'
    ]
    df = df.drop(labels=NON_FIN_COLS, axis=1)
    df = pd.DataFrame(df.values, columns=df.columns.values,
                      dtype=np.float64).fillna(0)

    # reduce complexity of rating based interest rate cols
    df.loc[:, 'BAMLH0A3HYCSYTW'] -= df.BAMLH0A2HYBSYTW
    df.loc[:, 'BAMLH0A2HYBSYTW'] -= df.BAMLH0A1HYBBSYTW
    df.loc[:, 'BAMLH0A1HYBBSYTW'] -= df.BAMLC0A4CBBBSYTW
    df.loc[:, 'BAMLC0A4CBBBSYTW'] -= df.BAMLC0A3CASYTW
    df.loc[:, 'BAMLC0A3CASYTW'] -= df.BAMLC0A2CAASYTW
    df.loc[:, 'BAMLC0A2CAASYTW'] -= df.BAMLC0A1CAAASYTW

    # reduce complexity of duration based interest rate cols
    df.loc[:, 'BAMLC8A0C15PYSYTW'] -= df.BAMLC7A0C1015YSYTW
    df.loc[:, 'BAMLC7A0C1015YSYTW'] -= df.BAMLC4A0C710YSYTW
    df.loc[:, 'BAMLC4A0C710YSYTW'] -= df.BAMLC3A0C57YSYTW
    df.loc[:, 'BAMLC3A0C57YSYTW'] -= df.BAMLC2A0C35YSYTW
    df.loc[:, 'BAMLC2A0C35YSYTW'] -= df.BAMLC1A0C13YSYTW

    # reduce complexity by adding line item complements
    # residual opex
    df['other_opex'] = df.operatingexpenses - df.sgaexpense \
        - df.researchanddevelopment - df.depreciationandamortizationexpense \
        - df.operatingexpenseexitems

    # residual addbacks
    df['other_addbacks'] = df.operatingexpenseexitems - df.restructuring \
        - df.assetimpairment

    # residual investments
    # consolidate afs and sti
    df.shortterminvestments = np.where(
        df.availableforsalesecurities.eq(df.shortterminvestments),
        df.shortterminvestments,
        df.shortterminvestments + df.availableforsalesecurities)

    df['other_investments'] = df.totalinvestments \
        - df.shortterminvestments - df.longterminvestments

    # residual current assets
    df['other_current_assets'] = df.currentassets - df.shortterminvestments \
        - df.cash

    # residual other long-term assets
    df['other_lt_assets'] = df.assets - df.ppe - df.longterminvestments \
        - df.currentassets

    # residual cash flow statement
    df['other_opcf'] = df.operatingcashflow - df.netincome \
        - df.depreciationamortization - df.sharebasedcompensation \
        - df.assetimpairment

    df['other_invcf'] = df.investingcashflow - df.capex \
        - df.acquisitiondivestitures

    df['dividends'] = df.paymentsofdividendscommonstock \
        + df.paymentsofdividendspreferredstock \
        + df.paymentsofdividendsnoncontrollinginterest \

    df['other_fincf'] = df.financingcashflow \
        - df.dividends \
        - df.paymentsforrepurchaseofcommonstock

    # capitalization adjustments:
    # [1] calculate mkt cap and ev
    # [2] normalize each row by ev
    # [3] conditionally standardize each column
    df['mkt_cap'] = df.adj_close * df.sharesoutstandingendofperiod
    df['ev'] = df.mkt_cap + df.totaldebt - df.cash \
        - df.shortterminvestments - df.longterminvestments
    xcol_mask = [
        c for c in df.columns.values
        if c not in rate_cols + ['close_yld', 'days_to_mtrty']
    ]
    df.loc[:, xcol_mask] = df.loc[:, xcol_mask].div(df.ev, axis=0)
    if standardize:
        df = (df - df.mean(axis=0)) / df.std(axis=0)

    # drop unnecessary cols
    DROP_COLS = [
        'avgsharesoutstandingbasic', 'avgdilutedsharesoutstanding',
        'commonstockdividendspershare', 'operatingexpenses',
        'operatingexpenseexitems', 'operatingincome', 'ebitda',
        'earningsbeforetaxes', 'netincome', 'totalinvestments',
        'availableforsalesecurities', 'currentassets', 'assets',
        'currentlongtermdebt', 'longtermdebt',
        'lineofcreditfacilityamountoutstanding', 'secureddebt',
        'convertibledebt', 'termloan', 'mortgagedebt', 'unsecureddebt',
        'mediumtermnotes', 'trustpreferredsecurities', 'seniornotes',
        'subordinateddebt', 'operatingcashflow', 'investingcashflow',
        'financingcashflow', 'paymentsofdividends', 'capex', 'ev',
        'stockrepurchasedduringperiodvalue', 'adj_close',
        'stockrepurchasedduringperiodshares', 'incometaxespaid',
        'interestpaidnet', 'sharesoutstandingendofperiod',
        'restrictedcashandinvestmentscurrent',
        'paymentsofdividendspreferredstock', 'paymentsofdividendscommonstock',
        'paymentsofdividendsnoncontrollinginterest', 'assetimpairment',
        'restructuring'
    ]
    df = df.drop(labels=DROP_COLS, axis=1).dropna(axis=1, how='all')

    # order cols into financials, interest rates, instrument metrics
    outcols = [
        c for c in df.columns.values
        if c not in rate_cols + ['days_to_mtrty', 'close_yld']
    ]
    outcols += rate_cols + ['days_to_mtrty']

    # split into x, y, column names
    # outcols = [c for c in df.columns.values if c != 'close_yld']
    x, y = df[outcols].values, df.close_yld.values
    return x, y, outcols