def update_corporate_fk(): # corporates matching 1st 6 digits of CUSIP-9 with entity CUSIP-6 s = select([ Corporate.id, Entity.id, Corporate.entity_id, Corporate.cusip9, Entity.cusip6 ]).where(Corporate.entity_id.is_(None)).where( func.left(Corporate.cusip9, 6) == Entity.cusip6) rows = db.execute(s).fetchall() # update entity_id for CUSIP-6 matches for cid, eid, _, cusip9, cusip6 in rows: db.query(Corporate).filter(Corporate.id == cid).update( {Corporate.entity_id: eid}, synchronize_session=False) db.commit() # corporates matching company_symbol with entity ticker s = select([ Corporate.id, Entity.id, Corporate.entity_id, Corporate.company_symbol, Entity.ticker ]).where(Corporate.entity_id.is_(None)).where( Corporate.company_symbol == Entity.ticker) rows = db.execute(s).fetchall() # update entity_id for ticker matches for r in rows: db.query(Corporate).filter(Corporate.id == r[0]).update( {Corporate.entity_id: r[1]}, synchronize_session=False) db.commit()
def update_equity_px_fk(): # equity pxs matching ticker with entity ticker equity_px = Base.metadata.tables['equity_px'] entity = Base.metadata.tables['entity'] s = update(equity_px).where(equity_px.columns.entity_id.is_(None)).where( equity_px.columns.ticker == entity.columns.ticker).values( entity_id=entity.columns.id) db.execute(s) db.commit()
def update_financial_fk(): # financials matching ticker with entity ticker financial = Base.metadata.tables['financial'] entity = Base.metadata.tables['entity'] s = update(financial).where(financial.columns.entity_id.is_(None)).where( financial.columns.ticker == entity.columns.ticker).values( entity_id=entity.columns.id) db.execute(s) db.commit()
def update_corp_tx_fk(): corp_tx = Base.metadata.tables['corp_tx'] corporate = Base.metadata.tables['corporate'] # corp_tx cusip_id matches corporate cusip9 s = update(corp_tx).where(corp_tx.columns.corporate_id.is_(None)).where( corp_tx.columns.cusip_id == corporate.columns.cusip9).values( corporate_id=corporate.columns.id) db.execute(s) db.commit()
def _get_ltv_data(ids, release_window, release_count): # subquery corp_txs for release_count financial releases during window days_from_release = CorpTx.trans_dt - Financial.earnings_release_date fin_count = func.count(Financial.id).label('fin_count') window_stmt = db.query(CorpTx.id) \ .join(Financial, Financial.ticker == CorpTx.company_symbol) \ .filter(CorpTx.id.in_(ids), days_from_release > 0, days_from_release <= release_window) \ .group_by(CorpTx.id) \ .having(fin_count == release_count) \ .subquery().alias('window_sq') # query financials with equity px and interest rate data ltv = (Financial.totaldebt / ev).label('ltv') s = db.query(ltv) \ .select_from(CorpTx) \ .join(window_stmt, window_stmt.c.id == CorpTx.id) \ .join(EquityPx, and_(CorpTx.company_symbol == EquityPx.ticker, CorpTx.trans_dt == EquityPx.date)) \ .join(InterestRate, CorpTx.trans_dt == InterestRate.date) \ .join(Financial, Financial.ticker == CorpTx.company_symbol) \ .filter(days_from_release > 0, days_from_release <= release_window) \ .distinct(CorpTx.cusip_id, CorpTx.trans_dt) \ .order_by( CorpTx.cusip_id, CorpTx.trans_dt.desc(), Financial.earnings_release_date.desc()) \ return db.execute(s).fetchall()
def counts_by_sym(ids): s = db.query(CorpTx.company_symbol, func.count(CorpTx.company_symbol)) \ .filter(CorpTx.id.in_(ids)) \ .group_by(CorpTx.company_symbol) results = db.execute(s).fetchall() return results
def get_target_stats(ids): """Returns mean, std (pop)""" s = db.query(func.avg(CorpTx.close_yld), func.stddev_pop(CorpTx.close_yld)) \ .filter(CorpTx.id.in_(ids)) return db.execute(s).fetchall()[0]
def get_corptx_ids(tickers, release_window, release_count, limit, tick_limit, sd, ed): """ Gets sample_count ids for ticker with at least release_count earnings within release_window. params ticker (str): corp_tx company_symbol release_window (int): relevant earnings_release_date window release_count (int): min number of earnings releases during release_window limit (int): samples return limit returns ids (1D np arr): matching ids """ # subquery corp_txs for release_count financial releases during window days_from_release = CorpTx.trans_dt - Financial.earnings_release_date fin_count = func.count(Financial.id).label('fin_count') window_stmt = db.query(CorpTx.id) \ .distinct(CorpTx.cusip_id, CorpTx.trans_dt) \ .join(Financial, Financial.ticker == CorpTx.company_symbol) \ .filter(CorpTx.company_symbol.in_(tickers), CorpTx.close_yld > 0, CorpTx.close_yld <= 20.0, days_from_release <= release_window, days_from_release > 0) \ .group_by(CorpTx.id) \ .having(fin_count == release_count) \ .subquery('window_sq') # partition by row number rn = func.row_number() \ .over(partition_by=CorpTx.company_symbol, order_by=CorpTx.id).label('rn') sq = db.query(CorpTx.id, rn) \ .join(window_stmt, CorpTx.id == window_stmt.c.id) \ .join(EquityPx, and_(CorpTx.company_symbol == EquityPx.ticker, CorpTx.trans_dt == EquityPx.date)) \ .join(InterestRate, CorpTx.trans_dt == InterestRate.date) \ .join(Financial, Financial.ticker == CorpTx.company_symbol) \ .filter(days_from_release <= release_window, days_from_release > 0, CorpTx.trans_dt <= ed, CorpTx.trans_dt > sd).subquery('sq') s = db.query(CorpTx.id) \ .distinct(CorpTx.id, CorpTx.trans_dt) \ .join(sq, sq.c.id == CorpTx.id) \ .filter(sq.c.rn <= tick_limit*release_count) \ .order_by(CorpTx.trans_dt.asc()) \ .limit(limit) ids = db.execute(s).fetchall() return np.unique(np.array(ids).flatten())
def _get_fwd_credit_tx_data(ids, days_lower, days_upper): s = db.query(*FWD_CTX_COLS) \ .select_from(ctx1) \ .join(ctx2, ctx1.company_symbol == ctx2.company_symbol) \ .join(InterestRate, ctx2.trans_dt == InterestRate.date) \ .filter(ctx1.id.in_(ids), ctx2.close_yld > 0, days_fwd > days_lower, days_fwd <= days_upper) \ .distinct(ctx1.id) \ .order_by(ctx1.id, ctx2.trans_dt.asc()) return db.execute(s).fetchall()
def build_dataset(): # list of tickers and transaction dates s = db.query(CorpTx.company_symbol, CorpTx.trans_dt) \ .filter(CorpTx.company_symbol == EquityPx.ticker) \ .filter(CorpTx.trans_dt == EquityPx.date) \ .group_by(CorpTx.company_symbol, CorpTx.trans_dt) ticks_and_dts = db.execute(s).fetchall() # find sample for n combos n = 1 ticks_and_dts = ticks_and_dts[:n] for tick, dt in ticks_and_dts: fins = find_sample(tick, dt) if len(fins) > 0: print(fins)
def get_fwd_credit_tx_ids(base_ids, days_lower, days_upper): s = db.query(ctx1.id, ctx2.id) \ .select_from(ctx1) \ .join(ctx2, ctx1.cusip_id == ctx2.cusip_id) \ .join(InterestRate, ctx1.trans_dt == InterestRate.date) \ .filter(ctx1.id.in_(base_ids), ctx1.close_yld > 0, ctx1.close_yld <= 20, ctx2.close_yld > 0, days_fwd > days_lower, days_fwd <= days_upper) \ .distinct(ctx1.id) \ .order_by(ctx1.id, ctx2.trans_dt.asc()) id_pairs = db.execute(s).fetchall() return np.array(id_pairs)
def _get_credit_tx_data(ids): """ Gets credit samples for given corp_tx id params id (int): corp_tx id of credit returns samples (list): queried samples """ s = db.query(*CTX_COLS) \ .select_from(CorpTx) \ .filter(CorpTx.id.in_(ids)) \ .join(InterestRate, CorpTx.trans_dt == InterestRate.date) return db.execute(s).fetchall()
def _get_financial_data(ids, release_window, release_count, limit): """ Gets financials and equity px time series for corp_tx id params id (int): corp_tx id of credit release_window (int): relevant financials day window filter limit (int): samples limit returns samples (list): queried samples """ # subquery corp_txs for release_count financial releases during window window_stmt = db.query(CorpTx.id) \ .join(Financial, Financial.ticker == CorpTx.company_symbol) \ .filter(CorpTx.id.in_(ids), days_from_release > 0, days_from_release <= release_window) \ .group_by(CorpTx.id) \ .having(fin_count == release_count) \ .subquery().alias('window_sq') # query financials with equity px and interest rate data s = db.query(*FIN_COLS) \ .select_from(CorpTx) \ .join(window_stmt, window_stmt.c.id == CorpTx.id) \ .join(EquityPx, and_(CorpTx.company_symbol == EquityPx.ticker, CorpTx.trans_dt == EquityPx.date)) \ .join(InterestRate, CorpTx.trans_dt == InterestRate.date) \ .join(Financial, Financial.ticker == CorpTx.company_symbol) \ .filter(days_from_release > 0, days_from_release <= release_window) \ .distinct(CorpTx.cusip_id, CorpTx.trans_dt, Financial.earnings_release_date) \ .order_by( CorpTx.cusip_id, CorpTx.trans_dt.desc(), Financial.earnings_release_date.desc()) \ .limit(limit) return db.execute(s).fetchall()
def find_sample(ticker, trans_date, fin_count=8): # calculate minimum date with buffer for CY changes tdelta = timedelta(days=720) # min_dt = (trans_date-timedelta(days=days)).strftime('%Y-%m-%d') s = db.query(Financial.ticker, CorpTx.trans_dt, Financial.earnings_release_date, Financial.revenueadjusted) \ .filter(and_(CorpTx.trans_dt == CorpTx.trans_dt, CorpTx.close_pr <= 100)) \ .filter(EquityPx.date == CorpTx.trans_dt) \ .filter(CorpTx.company_symbol == EquityPx.ticker) \ .filter(Financial.ticker == EquityPx.ticker) \ .filter(Financial.earnings_release_date < CorpTx.trans_dt) \ .filter(Financial.earnings_release_date >= CorpTx.trans_dt-tdelta) \ .group_by(Financial.ticker, CorpTx.trans_dt, Financial.earnings_release_date, Financial.revenueadjusted) \ .order_by(CorpTx.trans_dt.desc(), Financial.earnings_release_date.asc()) \ .limit(fin_count) return db.execute(s).fetchall()
def get_credit_targets(ids): s = db.query(CorpTx.close_yld) \ .filter(CorpTx.id.in_(ids)) ylds = db.execute(s).fetchall() return np.array(ylds).flatten()
def build_feature_data(day_window=100, sample_count=5, standardize=True): """ Generates dataset consisting of interest rate, financial, credit terms, and yield data by transaction. Financial data is normalized by EV. All features are standardized across time. returns: - X (np arr): interest rate, financial, credit terms - Y (np arr): yield to worst """ # query corporate transactions to generate terms, equity price, and # last reported financials within given day range s = db.query(CorpTx.company_symbol, CorpTx.trans_dt, CorpTx.mtrty_dt, EquityPx.adj_close, CorpTx.close_yld, Financial, InterestRate.BAMLC0A1CAAASYTW, InterestRate.BAMLC0A2CAASYTW, InterestRate.BAMLC0A3CASYTW, InterestRate.BAMLC0A4CBBBSYTW, InterestRate.BAMLH0A1HYBBSYTW, InterestRate.BAMLH0A2HYBSYTW, InterestRate.BAMLH0A3HYCSYTW, InterestRate.BAMLC1A0C13YSYTW, InterestRate.BAMLC2A0C35YSYTW, InterestRate.BAMLC3A0C57YSYTW, InterestRate.BAMLC4A0C710YSYTW, InterestRate.BAMLC7A0C1015YSYTW, InterestRate.BAMLC8A0C15PYSYTW) \ .select_from(CorpTx) \ .join(EquityPx, and_(CorpTx.company_symbol == EquityPx.ticker, CorpTx.trans_dt == EquityPx.date)) \ .join(InterestRate, CorpTx.trans_dt == InterestRate.date) \ .join(Financial, CorpTx.company_symbol == Financial.ticker) \ .filter( and_( CorpTx.trans_dt-Financial.earnings_release_date <= day_window, CorpTx.trans_dt-Financial.earnings_release_date > 0)) \ .order_by(Financial.ticker, CorpTx.trans_dt.desc(), Financial.earnings_release_date.desc()) \ .distinct(Financial.ticker, CorpTx.trans_dt) \ .order_by(func.random()) \ .limit(sample_count) samples = db.execute(s).fetchall() # convert to df colnames = [ 'company_symbol', 'trans_dt', 'mtrty_dt', 'adj_close', 'close_yld' ] colnames += Financial.__table__.columns.keys() rate_cols = InterestRate.__table__.columns.keys() rate_cols = [c for c in rate_cols if c not in ['id', 'date']] colnames += [c for c in rate_cols if c not in ['id', 'date']] df = pd.DataFrame(samples, columns=colnames) # calculate days to maturity df['days_to_mtrty'] = (df.mtrty_dt - df.trans_dt) / np.timedelta64(1, 'D') # drop non-financial cols and fill nans with 0 NON_FIN_COLS = [ 'company_symbol', 'id', 'ticker', 'entity_id', 'earnings_release_date', 'filing_date', 'period', 'period_start', 'period_end', 'mtrty_dt', 'trans_dt' ] df = df.drop(labels=NON_FIN_COLS, axis=1) df = pd.DataFrame(df.values, columns=df.columns.values, dtype=np.float64).fillna(0) # reduce complexity of rating based interest rate cols df.loc[:, 'BAMLH0A3HYCSYTW'] -= df.BAMLH0A2HYBSYTW df.loc[:, 'BAMLH0A2HYBSYTW'] -= df.BAMLH0A1HYBBSYTW df.loc[:, 'BAMLH0A1HYBBSYTW'] -= df.BAMLC0A4CBBBSYTW df.loc[:, 'BAMLC0A4CBBBSYTW'] -= df.BAMLC0A3CASYTW df.loc[:, 'BAMLC0A3CASYTW'] -= df.BAMLC0A2CAASYTW df.loc[:, 'BAMLC0A2CAASYTW'] -= df.BAMLC0A1CAAASYTW # reduce complexity of duration based interest rate cols df.loc[:, 'BAMLC8A0C15PYSYTW'] -= df.BAMLC7A0C1015YSYTW df.loc[:, 'BAMLC7A0C1015YSYTW'] -= df.BAMLC4A0C710YSYTW df.loc[:, 'BAMLC4A0C710YSYTW'] -= df.BAMLC3A0C57YSYTW df.loc[:, 'BAMLC3A0C57YSYTW'] -= df.BAMLC2A0C35YSYTW df.loc[:, 'BAMLC2A0C35YSYTW'] -= df.BAMLC1A0C13YSYTW # reduce complexity by adding line item complements # residual opex df['other_opex'] = df.operatingexpenses - df.sgaexpense \ - df.researchanddevelopment - df.depreciationandamortizationexpense \ - df.operatingexpenseexitems # residual addbacks df['other_addbacks'] = df.operatingexpenseexitems - df.restructuring \ - df.assetimpairment # residual investments # consolidate afs and sti df.shortterminvestments = np.where( df.availableforsalesecurities.eq(df.shortterminvestments), df.shortterminvestments, df.shortterminvestments + df.availableforsalesecurities) df['other_investments'] = df.totalinvestments \ - df.shortterminvestments - df.longterminvestments # residual current assets df['other_current_assets'] = df.currentassets - df.shortterminvestments \ - df.cash # residual other long-term assets df['other_lt_assets'] = df.assets - df.ppe - df.longterminvestments \ - df.currentassets # residual cash flow statement df['other_opcf'] = df.operatingcashflow - df.netincome \ - df.depreciationamortization - df.sharebasedcompensation \ - df.assetimpairment df['other_invcf'] = df.investingcashflow - df.capex \ - df.acquisitiondivestitures df['dividends'] = df.paymentsofdividendscommonstock \ + df.paymentsofdividendspreferredstock \ + df.paymentsofdividendsnoncontrollinginterest \ df['other_fincf'] = df.financingcashflow \ - df.dividends \ - df.paymentsforrepurchaseofcommonstock # capitalization adjustments: # [1] calculate mkt cap and ev # [2] normalize each row by ev # [3] conditionally standardize each column df['mkt_cap'] = df.adj_close * df.sharesoutstandingendofperiod df['ev'] = df.mkt_cap + df.totaldebt - df.cash \ - df.shortterminvestments - df.longterminvestments xcol_mask = [ c for c in df.columns.values if c not in rate_cols + ['close_yld', 'days_to_mtrty'] ] df.loc[:, xcol_mask] = df.loc[:, xcol_mask].div(df.ev, axis=0) if standardize: df = (df - df.mean(axis=0)) / df.std(axis=0) # drop unnecessary cols DROP_COLS = [ 'avgsharesoutstandingbasic', 'avgdilutedsharesoutstanding', 'commonstockdividendspershare', 'operatingexpenses', 'operatingexpenseexitems', 'operatingincome', 'ebitda', 'earningsbeforetaxes', 'netincome', 'totalinvestments', 'availableforsalesecurities', 'currentassets', 'assets', 'currentlongtermdebt', 'longtermdebt', 'lineofcreditfacilityamountoutstanding', 'secureddebt', 'convertibledebt', 'termloan', 'mortgagedebt', 'unsecureddebt', 'mediumtermnotes', 'trustpreferredsecurities', 'seniornotes', 'subordinateddebt', 'operatingcashflow', 'investingcashflow', 'financingcashflow', 'paymentsofdividends', 'capex', 'ev', 'stockrepurchasedduringperiodvalue', 'adj_close', 'stockrepurchasedduringperiodshares', 'incometaxespaid', 'interestpaidnet', 'sharesoutstandingendofperiod', 'restrictedcashandinvestmentscurrent', 'paymentsofdividendspreferredstock', 'paymentsofdividendscommonstock', 'paymentsofdividendsnoncontrollinginterest', 'assetimpairment', 'restructuring' ] df = df.drop(labels=DROP_COLS, axis=1).dropna(axis=1, how='all') # order cols into financials, interest rates, instrument metrics outcols = [ c for c in df.columns.values if c not in rate_cols + ['days_to_mtrty', 'close_yld'] ] outcols += rate_cols + ['days_to_mtrty'] # split into x, y, column names # outcols = [c for c in df.columns.values if c != 'close_yld'] x, y = df[outcols].values, df.close_yld.values return x, y, outcols