def load_industries(pickle_from=None, pickle_to=None): """Load industry portfolio returns from Ken French's website. Parameters ========== ports : int or list number of portfolios; choose from [5, 10, 12, 17, 30, 38, 48] start : str, default 1967 start date in time series of returns end : date, default date.today() end date in time series of returns form : str, {'num', 'dec'}, default 'num' display format of factor returns round : int, default 3 decimal places to round data Returns ======= industries : dict of DataFrames each key is a portfolio group See also ===== # from pandas_datareader.famafrench import get_available_datasets # get_available_datasets() """ full = [5, 10, 12, 17, 30, 38, 48] rets = [] for port in [str(port) + '_Industry_Portfolios' for port in full]: ret = dr(port, 'famafrench', start=DSTART)[0] rets.append(ret.to_timestamp(how='end')) industries = dict(zip(full, rets)) return industries
def load_rf( freq='M', pickle_from=None, pickle_to=None, ): """Build a risk-free rate return series using 3-month US T-bill yields. The 3-Month Treasury Bill: Secondary Market Rate from the Federal Reserve (a yield) is convert to a total return. See 'Methodology' for details. The time series should closely mimic returns of the BofA Merrill Lynch US Treasury Bill (3M) (Local Total Return) index. Parameters ========== reload : bool, default False If False, use pickled data. If True, reload from source freq : str, sequence, or set If a single-character string, return a single-column DataFrame with index frequency corresponding to `freq`. If a sequence or set, return a dict of DataFrames with the keys corresponding to `freq`(s) Methodology =========== The Federal Reserve publishes a daily chart of Selected Interest Rates (release H.15; www.federalreserve.gov/releases/h15/). As with a yield curve, some yields are interpolated from recent issues because Treasury auctions do not occur daily. While the de-annualized ex-ante yield itself is a fairly good tracker of the day's total return, it is not perfect and can exhibit non-neglible error in periods of volatile short rates. The purpose of this function is to convert yields to total returns for 3-month T-bills. It is a straightforward process given that these are discount (zero-coupon) securities. It consists of buying a 3-month bond at the beginning of each month, then amortizing that bond throughout the month to back into the price of a <3-month tenor bond. The source data (pulled from fred.stlouisfed.org) is quoted on a discount basis. (See footnote 4 from release H.15.) This is converted to a bond-equivlanet yield (BEY) and then translated to a hypothetical daily total return. The process largely follows Morningstar's published Return Calculation of U.S. Treasury Constant Maturity Indices, and is as follows: - At the beginning of each month a bill is purchased at the prior month-end price, and daily returns in the month reflect the change in daily valuation of this bill - If t is not a business day, its yield is the yield of the prior business day. - At each day during the month, the price of a 3-month bill purchased on the final calendar day of the previous month is computed. - Month-end pricing is unique. At each month-end date, there are effectively two bonds and two prices. The first is the bond hypothetically purchased on the final day of the prior month with 2m remaining to maturity, and the second is a new-issue bond purchased that day with 3m to maturity. The former is used as the numerator to compute that day's total return, while the latter is used as the denominator to compute the next day's (1st day of next month) total return. Description of the BofA Merrill Lynch US 3-Month Treasury Bill Index: The BofA Merrill Lynch US 3-Month Treasury Bill Index is comprised of a single issue purchased at the beginning of the month and held for a full month. At the end of the month that issue is sold and rolled into a newly selected issue. The issue selected at each month-end rebalancing is the outstanding Treasury Bill that matures closest to, but not beyond, three months from the rebalancing date. To qualify for selection, an issue must have settled on or before the month-end rebalancing date. (Source: Bank of America Merrill Lynch) See also ======== FRED: 3-Month Treasury Bill: Secondary Market Rate (DTB3) https://fred.stlouisfed.org/series/DTB3 McGraw-Hill/Irwin, Interest Rates, 2008. https://people.ucsc.edu/~lbaum/econ80h/LS-Chap009.pdf Morningstar, Return Calculation of U.S. Treasury Constant Maturity Indices, September 2008. """ # Validate `freq` param freqs = list('DWMQA') freq = freq.upper() if freq.islower() else freq if freq not in freqs: raise ValueError('`freq` must be either a single element or subset' ' from %s, case-insensitive' % freqs) # Load daily 3-Month Treasury Bill: Secondary Market Rate # Note that this is on discount basis and will be converted to BEY # Periodicity is daily rates = dr('DTB3', 'fred', DSTART) * 0.01 rates = (rates.asfreq('D', method='ffill').fillna(method='ffill').squeeze()) # Algebra doesn't 'work' on DateOffsets, don't simplify here! trigger = rates.index.is_month_end dtm_old = rates.index + offsets.MonthEnd(-1) + offsets.MonthEnd(3) \ - rates.index dtm_new = rates.index.where(trigger, rates.index + offsets.MonthEnd(-1)) \ + offsets.MonthEnd(3) - rates.index # This does 2 things in one step: # (1) convert discount yield to BEY # (2) get the price at that BEY and days to maturity # The two equations are simplified # See https://people.ucsc.edu/~lbaum/econ80h/LS-Chap009.pdf p_old = (100 / 360) * (360 - rates * dtm_old.days) p_new = (100 / 360) * (360 - rates * dtm_new.days) res = p_old.pct_change().where(trigger, p_new.pct_change()) res = returns.prep(res, in_format='dec', name='RF', freq='D') if freq != 'D': res = returns.prep(dr.rollup(out_freq=freq), in_format='dec', freq=freq) return res
def load_rates(freq='D', pickle_from=None, pickle_to=None): """Load interest rates from https://fred.stlouisfed.org/. Parameters ========== reload : bool, default True If True, download the data from source rather than loading pickled data freq : str {'D', 'W', 'M'}, default 'D' Frequency of time series; daily, weekly, or monthly start : str or datetime, default '1963', optional Start date of time series dropna : bool, default True If True, drop NaN along rows in resulting DataFrame how : str, default 'any' Passed to dropna() Original source =============== Board of Governors of the Federal Reserve System H.15 Selected Interest Rates https://www.federalreserve.gov/releases/h15/ """ months = [1, 3, 6] years = [1, 2, 3, 5, 7, 10, 20, 30] # Nested dictionaries of symbols from fred.stlouisfed.org nom = { 'D': ['DGS%sMO' % m for m in months] + ['DGS%s' % y for y in years], 'W': ['WGS%sMO' % m for m in months] + ['WGS%sYR' % y for y in years], 'M': ['GS%sM' % m for m in months] + ['GS%s' % y for y in years] } tips = { 'D': ['DFII%s' % y for y in years[3:7]], 'W': ['WFII%s' % y for y in years[3:7]], 'M': ['FII%s' % y for y in years[3:7]] } fcp = { 'D': ['DCPF1M', 'DCPF2M', 'DCPF3M'], 'W': ['WCPF1M', 'WCPF2M', 'WCPF3M'], 'M': ['CPF1M', 'CPF2M', 'CPF3M'] } nfcp = { 'D': ['DCPN30', 'DCPN2M', 'DCPN3M'], 'W': ['WCPN1M', 'WCPN2M', 'WCPN3M'], 'M': ['CPN1M', 'CPN2M', 'CPN3M'] } short = { 'D': ['DFF', 'DPRIME', 'DPCREDIT'], 'W': ['FF', 'WPRIME', 'WPCREDIT'], 'M': ['FEDFUNDS', 'MPRIME', 'MPCREDIT'] } rates = list( itertools.chain.from_iterable( [d[freq] for d in [nom, tips, fcp, nfcp, short]])) rates = dr(rates, 'fred', start=DSTART) l1 = ['Nominal'] * 11 + ['TIPS'] * 4 + ['Fncl CP'] * 3 \ + ['Non-Fncl CP'] * 3 + ['Short Rates'] * 3 l2 = ['%sm' % m for m in months] + ['%sy' % y for y in years] \ + ['%sy'% y for y in years[3:7]] \ + 2 * ['%sm' % m for m in range(1, 4)] \ + ['Fed Funds', 'Prime Rate', 'Primary Credit'] rates.columns = pd.MultiIndex.from_arrays([l1, l2]) return rates
def load_factors(pickle_from=None, pickle_to=None): """Load risk factor returns. Factors ======= Symbol Description Source ------ ---------- ------ MKT French SMB Size (small minus big) French HML Value (high minus low) French RMW Profitability (robust minus weak) French CMA Investment (conservative minus aggressive) French UMD Momentum (up minus down) French STR Short-term reversal French LTR Long-term reversal French BETA Beta French ACC Accruals French VAR Variance French IVAR Residual variance French EP Earnings-to-price French CP Cash flow-to-price French DP Dividend-to-price French BAB Betting against beta AQR QMJ Quality minus junk AQR HMLD Value (high minus low) [modified version] AQR LIQ Liquidity Pastor BDLB Bond lookback straddle Hsieh FXLB Curency lookback straddle Hsieh CMLB Commodity lookback straddle Hsieh IRLB Interest rate lookback straddle Hsieh STLB Stock lookback straddle Hsieh PUT CBOE S&P 500 PutWrite Index CBOE BXM CBOE S&P 500 BuyWrite Index® CBOE RXM CBOE S&P 500 Risk Reversal Index CBOE Source Directory ================ Source Link ------ ---- French http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html Pastor http://faculty.chicagobooth.edu/lubos.pastor/research/liq_data_1962_2016.txt AQR https://www.aqr.com/library/data-sets Hsieh https://faculty.fuqua.duke.edu/~dah7/HFData.htm Fed https://fred.stlouisfed.org/ CBOE http://www.cboe.com/products/strategy-benchmark-indexes """ # TODO: factors elegible for addition # VIIX, VIIZ, XIV, ZIV, CRP (AQR) # http://www.cboe.com/micro/buywrite/monthendpricehistory.xls ends 2016 # could use: # http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/putdailyprice.csv # Warning: slow, kludgy data retrieval follows # ------------------------------------------------------------------------ # `tgt` will become a list of DataFrames and eventually concatenated tgt = [] # MKT, SMB, HML, RMW, CMA, RF, UMD, STR, LTR facs = [ 'F-F_Research_Data_5_Factors_2x3', 'F-F_Momentum_Factor', 'F-F_ST_Reversal_Factor', 'F-F_LT_Reversal_Factor' ] for fac in facs: tgt.append(dr(fac, 'famafrench', DSTART)[0]) # BETA, ACC, VAR, IVAR require some manipulation to compute returns # in the dual-sort method of Fama-French for i in ['BETA', 'AC', 'VAR', 'RESVAR']: ser = dr('25_Portfolios_ME_' + i + '_5x5', 'famafrench', DSTART)[0] ser = (ser.iloc[:, [0, 5, 10, 15, 20]].mean(axis=1) - ser.iloc[:, [4, 9, 14, 19, 24]].mean(axis=1)) ser = ser.rename(i) tgt.append(ser) # E/P, CF/P, D/P (univariate sorts, quintile spreads) for i in ['E-P', 'CF-P', 'D-P']: ser = dr('Portfolios_Formed_on_' + i, 'famafrench', DSTART)[0] ser = ser.loc[:, 'Hi 20'] - ser.loc[:, 'Lo 20'] ser = ser.rename(i) tgt.append(ser) tgt = [df.to_timestamp(how='end') for df in tgt] # BAB, QMJ, HMLD # TODO: performance is poor here, runtime is eaten up by these 3 links = { 'BAB': 'http://bit.ly/2hWyaG8', 'QMJ': 'http://bit.ly/2hUBSgF', 'HMLD': 'http://bit.ly/2hdVb7G' } for key, value in links.items(): ser = read_excel(value, header=18, index_col=0)['USA'] * 100 ser = ser.rename(key) tgt.append(ser) # Lookback straddles link = 'http://faculty.fuqua.duke.edu/~dah7/DataLibrary/TF-Fac.xls' straddles = read_excel(link, header=14, index_col=0) straddles.index = (pd.DatetimeIndex(straddles.index.astype(str) + '01') + offsets.MonthEnd(1)) straddles = straddles * 100. tgt.append(straddles) # LIQ link = 'http://bit.ly/2pn2oBK' liq = read_csv(link, skiprows=14, delim_whitespace=True, header=None, usecols=[0, 3], index_col=0, names=['date', 'LIQ']) liq.index = (pd.DatetimeIndex(liq.index.astype(str) + '01') + offsets.MonthEnd(1)) liq = liq.replace(-99, np.nan) * 100. tgt.append(liq) # USD, HY fred = dr(['DTWEXB', 'BAMLH0A0HYM2'], 'fred', DSTART) # daily default fred = (fred.asfreq('D', method='ffill').fillna(method='ffill').asfreq('M')) fred.loc[:, 'DTWEXB'] = fred['DTWEXB'].pct_change() * 100. fred.loc[:, 'BAMLH0A0HYM2'] = fred['BAMLH0A0HYM2'].diff() tgt.append(fred) # PUT, BXM, RXM (CBOE options strategy indices) link1 = 'http://www.cboe.com/micro/put/put_86-06.xls' link2 = 'http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/putdailyprice.csv' put1 = (read_excel(link1, index_col=0, skiprows=6, header=None).rename_axis('DATE')) put2 = read_csv(link2, index_col=0, parse_dates=True, skiprows=7, header=None).rename_axis('DATE') put = (pd.concat((put1, put2)).rename(columns={ 1: 'PUT' }).iloc[:, 0].asfreq( 'D', method='ffill').fillna(method='ffill').asfreq('M').pct_change() * 100.) tgt.append(put) link1 = 'http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/bxmarchive.csv' link2 = 'http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/bxmcurrent.csv' bxm1 = read_csv(link1, index_col=0, parse_dates=True, skiprows=5, header=None).rename_axis('DATE') bxm2 = read_csv(link2, index_col=0, parse_dates=True, skiprows=4, header=None).rename_axis('DATE') bxm = (pd.concat((bxm1, bxm2)).rename(columns={ 1: 'BXM' }).iloc[:, 0].asfreq( 'D', method='ffill').fillna(method='ffill').asfreq('M').pct_change() * 100.) tgt.append(bxm) link = 'http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/rxm_historical.csv' rxm = (read_csv( link, index_col=0, parse_dates=True, skiprows=2, header=None).rename(columns={ 1: 'RXM' }).rename_axis('DATE').iloc[:, 0].asfreq('D', method='ffill').fillna( method='ffill').asfreq('M').pct_change() * 100.) tgt.append(rxm) # Clean up data retrieved above # ------------------------------------------------------------------------ factors = pd.concat(tgt, axis=1).round(2) newnames = { 'Mkt-RF': 'MKT', 'Mom ': 'UMD', 'ST_Rev': 'STR', 'LT_Rev': 'LTR', 'RESVAR': 'IVAR', 'AC': 'ACC', 'PTFSBD': 'BDLB', 'PTFSFX': 'FXLB', 'PTFSCOM': 'CMLB', 'PTFSIR': 'IRLB', 'PTFSSTK': 'STLB', 'DTWEXB': 'USD', 'BAMLH0A0HYM2': 'HY' } factors.rename(columns=newnames, inplace=True) # Get last valid RF date; returns will be constrained to this date factors = factors[:factors['RF'].last_valid_index()] # Subtract RF for long-only factors subtract = ['HY', 'PUT', 'BXM', 'RXM'] for i in subtract: factors.loc[:, i] = factors[i] - factors['RF'] return factors