Пример #1
0
def load_industries(pickle_from=None, pickle_to=None):
    """Load industry portfolio returns from Ken French's website.

    Parameters
    ==========
    ports : int or list
        number of portfolios; choose from [5, 10, 12, 17, 30, 38, 48]
    start : str, default 1967
        start date in time series of returns
    end : date, default date.today()
        end date in time series of returns
    form : str, {'num', 'dec'}, default 'num'
        display format of factor returns
    round : int, default 3
        decimal places to round data

    Returns
    =======
    industries : dict of DataFrames
        each key is a portfolio group

    See also
    =====
    # from pandas_datareader.famafrench import get_available_datasets
    # get_available_datasets()
    """

    full = [5, 10, 12, 17, 30, 38, 48]
    rets = []
    for port in [str(port) + '_Industry_Portfolios' for port in full]:
        ret = dr(port, 'famafrench', start=DSTART)[0]
        rets.append(ret.to_timestamp(how='end'))
    industries = dict(zip(full, rets))

    return industries
Пример #2
0
def load_rf(
    freq='M',
    pickle_from=None,
    pickle_to=None,
):
    """Build a risk-free rate return series using 3-month US T-bill yields.

    The 3-Month Treasury Bill: Secondary Market Rate from the Federal Reserve
    (a yield) is convert to a total return.  See 'Methodology' for details.

    The time series should closely mimic returns of the BofA Merrill Lynch US
    Treasury Bill (3M) (Local Total Return) index.

    Parameters
    ==========
    reload : bool, default False
        If False, use pickled data.  If True, reload from source
    freq : str, sequence, or set
        If a single-character string, return a single-column DataFrame with
        index frequency corresponding to `freq`.  If a sequence or set, return
        a dict of DataFrames with the keys corresponding to `freq`(s)

    Methodology
    ===========
    The Federal Reserve publishes a daily chart of Selected Interest Rates
    (release H.15; www.federalreserve.gov/releases/h15/).  As with a yield
    curve, some yields are interpolated from recent issues because Treasury
    auctions do not occur daily.

    While the de-annualized ex-ante yield itself is a fairly good tracker of
    the day's total return, it is not perfect and can exhibit non-neglible
    error in periods of volatile short rates.  The purpose of this function
    is to convert yields to total returns for 3-month T-bills.  It is a
    straightforward process given that these are discount (zero-coupon)
    securities.  It consists of buying a 3-month bond at the beginning of each
    month, then amortizing that bond throughout the month to back into the
    price of a <3-month tenor bond.

    The source data (pulled from fred.stlouisfed.org) is quoted on a discount
    basis.  (See footnote 4 from release H.15.)  This is converted to a
    bond-equivlanet yield (BEY) and then translated to a hypothetical daily
    total return.

    The process largely follows Morningstar's published Return Calculation of
    U.S. Treasury Constant Maturity Indices, and is as follows:
    - At the beginning of each month a bill is purchased at the prior month-end
      price, and daily returns in the month reflect the change in daily
      valuation of this bill
    - If t is not a business day, its yield is the yield of the prior
      business day.
    - At each day during the month, the price of a 3-month bill purchased on
      the final calendar day of the previous month is computed.
    - Month-end pricing is unique.  At each month-end date, there are
      effectively two bonds and two prices.  The first is the bond
      hypothetically purchased on the final day of the prior month with 2m
      remaining to maturity, and the second is a new-issue bond purchased that
      day with 3m to maturity.  The former is used as the numerator to compute
      that day's total return, while the latter is used as the denominator
      to compute the next day's (1st day of next month) total return.

    Description of the BofA Merrill Lynch US 3-Month Treasury Bill Index:
    The BofA Merrill Lynch US 3-Month Treasury Bill Index is comprised of a
    single issue purchased at the beginning of the month and held for a full
    month. At the end of the month that issue is sold and rolled into a newly
    selected issue. The     issue selected at each month-end rebalancing is the
    outstanding Treasury Bill that matures closest to, but not beyond, three
    months from the rebalancing date. To qualify for selection, an issue must
    have settled on or before the month-end rebalancing date.
        (Source: Bank of America Merrill Lynch)

    See also
    ========
    FRED: 3-Month Treasury Bill: Secondary Market Rate (DTB3)
      https://fred.stlouisfed.org/series/DTB3
    McGraw-Hill/Irwin, Interest Rates, 2008.
      https://people.ucsc.edu/~lbaum/econ80h/LS-Chap009.pdf
    Morningstar, Return Calculation of U.S. Treasury Constant Maturity Indices,
      September 2008.
    """

    # Validate `freq` param
    freqs = list('DWMQA')
    freq = freq.upper() if freq.islower() else freq
    if freq not in freqs:
        raise ValueError('`freq` must be either a single element or subset'
                         ' from %s, case-insensitive' % freqs)

    # Load daily 3-Month Treasury Bill: Secondary Market Rate
    # Note that this is on discount basis and will be converted to BEY
    # Periodicity is daily
    rates = dr('DTB3', 'fred', DSTART) * 0.01
    rates = (rates.asfreq('D',
                          method='ffill').fillna(method='ffill').squeeze())

    # Algebra doesn't 'work' on DateOffsets, don't simplify here!
    trigger = rates.index.is_month_end
    dtm_old = rates.index + offsets.MonthEnd(-1) + offsets.MonthEnd(3) \
            - rates.index
    dtm_new = rates.index.where(trigger, rates.index +
                                offsets.MonthEnd(-1)) \
            + offsets.MonthEnd(3) - rates.index

    # This does 2 things in one step:
    # (1) convert discount yield to BEY
    # (2) get the price at that BEY and days to maturity
    # The two equations are simplified
    # See https://people.ucsc.edu/~lbaum/econ80h/LS-Chap009.pdf
    p_old = (100 / 360) * (360 - rates * dtm_old.days)
    p_new = (100 / 360) * (360 - rates * dtm_new.days)

    res = p_old.pct_change().where(trigger, p_new.pct_change())
    res = returns.prep(res, in_format='dec', name='RF', freq='D')

    if freq != 'D':
        res = returns.prep(dr.rollup(out_freq=freq),
                           in_format='dec',
                           freq=freq)

    return res
Пример #3
0
def load_rates(freq='D', pickle_from=None, pickle_to=None):
    """Load interest rates from https://fred.stlouisfed.org/.

    Parameters
    ==========
    reload : bool, default True
        If True, download the data from source rather than loading pickled data
    freq : str {'D', 'W', 'M'}, default 'D'
        Frequency of time series; daily, weekly, or monthly
    start : str or datetime, default '1963', optional
        Start date of time series
    dropna : bool, default True
        If True, drop NaN along rows in resulting DataFrame
    how : str, default 'any'
        Passed to dropna()

    Original source
    ===============
    Board of Governors of the Federal Reserve System
    H.15 Selected Interest Rates
    https://www.federalreserve.gov/releases/h15/
    """

    months = [1, 3, 6]
    years = [1, 2, 3, 5, 7, 10, 20, 30]

    # Nested dictionaries of symbols from fred.stlouisfed.org
    nom = {
        'D': ['DGS%sMO' % m for m in months] + ['DGS%s' % y for y in years],
        'W': ['WGS%sMO' % m for m in months] + ['WGS%sYR' % y for y in years],
        'M': ['GS%sM' % m for m in months] + ['GS%s' % y for y in years]
    }

    tips = {
        'D': ['DFII%s' % y for y in years[3:7]],
        'W': ['WFII%s' % y for y in years[3:7]],
        'M': ['FII%s' % y for y in years[3:7]]
    }

    fcp = {
        'D': ['DCPF1M', 'DCPF2M', 'DCPF3M'],
        'W': ['WCPF1M', 'WCPF2M', 'WCPF3M'],
        'M': ['CPF1M', 'CPF2M', 'CPF3M']
    }

    nfcp = {
        'D': ['DCPN30', 'DCPN2M', 'DCPN3M'],
        'W': ['WCPN1M', 'WCPN2M', 'WCPN3M'],
        'M': ['CPN1M', 'CPN2M', 'CPN3M']
    }

    short = {
        'D': ['DFF', 'DPRIME', 'DPCREDIT'],
        'W': ['FF', 'WPRIME', 'WPCREDIT'],
        'M': ['FEDFUNDS', 'MPRIME', 'MPCREDIT']
    }

    rates = list(
        itertools.chain.from_iterable(
            [d[freq] for d in [nom, tips, fcp, nfcp, short]]))
    rates = dr(rates, 'fred', start=DSTART)

    l1 = ['Nominal'] * 11 + ['TIPS'] * 4 + ['Fncl CP'] * 3 \
       + ['Non-Fncl CP'] * 3 + ['Short Rates'] * 3

    l2 = ['%sm' % m for m in months] + ['%sy' % y for y in years] \
       + ['%sy'% y for y in years[3:7]] \
       + 2 * ['%sm' % m for m in range(1, 4)] \
       + ['Fed Funds', 'Prime Rate', 'Primary Credit']

    rates.columns = pd.MultiIndex.from_arrays([l1, l2])

    return rates
Пример #4
0
def load_factors(pickle_from=None, pickle_to=None):
    """Load risk factor returns.

    Factors
    =======
    Symbol      Description                                            Source
    ------      ----------                                             ------
    MKT                                                                French
    SMB         Size (small minus big)                                 French
    HML         Value (high minus low)                                 French
    RMW         Profitability (robust minus weak)                      French
    CMA         Investment (conservative minus aggressive)             French
    UMD         Momentum (up minus down)                               French
    STR         Short-term reversal                                    French
    LTR         Long-term reversal                                     French
    BETA        Beta                                                   French
    ACC         Accruals                                               French
    VAR         Variance                                               French
    IVAR        Residual variance                                      French
    EP          Earnings-to-price                                      French
    CP          Cash flow-to-price                                     French
    DP          Dividend-to-price                                      French
    BAB         Betting against beta                                   AQR
    QMJ         Quality minus junk                                     AQR
    HMLD        Value (high minus low) [modified version]              AQR
    LIQ         Liquidity                                              Pastor
    BDLB        Bond lookback straddle                                 Hsieh
    FXLB        Curency lookback straddle                              Hsieh
    CMLB        Commodity lookback straddle                            Hsieh
    IRLB        Interest rate lookback straddle                        Hsieh
    STLB        Stock lookback straddle                                Hsieh
    PUT         CBOE S&P 500 PutWrite Index                            CBOE
    BXM         CBOE S&P 500 BuyWrite Index®                           CBOE
    RXM         CBOE S&P 500 Risk Reversal Index                       CBOE

    Source Directory
    ================
    Source      Link
    ------      ----
    French      http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html
    Pastor      http://faculty.chicagobooth.edu/lubos.pastor/research/liq_data_1962_2016.txt
    AQR         https://www.aqr.com/library/data-sets
    Hsieh       https://faculty.fuqua.duke.edu/~dah7/HFData.htm
    Fed         https://fred.stlouisfed.org/
    CBOE        http://www.cboe.com/products/strategy-benchmark-indexes
    """

    # TODO: factors elegible for addition
    #   VIIX, VIIZ, XIV, ZIV, CRP (AQR)
    #   http://www.cboe.com/micro/buywrite/monthendpricehistory.xls ends 2016
    #   could use:
    #   http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/putdailyprice.csv

    # Warning: slow, kludgy data retrieval follows
    # ------------------------------------------------------------------------

    # `tgt` will become a list of DataFrames and eventually concatenated
    tgt = []

    # MKT, SMB, HML, RMW, CMA, RF, UMD, STR, LTR
    facs = [
        'F-F_Research_Data_5_Factors_2x3', 'F-F_Momentum_Factor',
        'F-F_ST_Reversal_Factor', 'F-F_LT_Reversal_Factor'
    ]

    for fac in facs:
        tgt.append(dr(fac, 'famafrench', DSTART)[0])

    # BETA, ACC, VAR, IVAR require some manipulation to compute returns
    # in the dual-sort method of Fama-French
    for i in ['BETA', 'AC', 'VAR', 'RESVAR']:
        ser = dr('25_Portfolios_ME_' + i + '_5x5', 'famafrench', DSTART)[0]
        ser = (ser.iloc[:, [0, 5, 10, 15, 20]].mean(axis=1) -
               ser.iloc[:, [4, 9, 14, 19, 24]].mean(axis=1))
        ser = ser.rename(i)
        tgt.append(ser)

    # E/P, CF/P, D/P (univariate sorts, quintile spreads)
    for i in ['E-P', 'CF-P', 'D-P']:
        ser = dr('Portfolios_Formed_on_' + i, 'famafrench', DSTART)[0]
        ser = ser.loc[:, 'Hi 20'] - ser.loc[:, 'Lo 20']
        ser = ser.rename(i)
        tgt.append(ser)

    tgt = [df.to_timestamp(how='end') for df in tgt]

    # BAB, QMJ, HMLD
    # TODO: performance is poor here, runtime is eaten up by these 3
    links = {
        'BAB': 'http://bit.ly/2hWyaG8',
        'QMJ': 'http://bit.ly/2hUBSgF',
        'HMLD': 'http://bit.ly/2hdVb7G'
    }
    for key, value in links.items():
        ser = read_excel(value, header=18, index_col=0)['USA'] * 100
        ser = ser.rename(key)
        tgt.append(ser)

    # Lookback straddles
    link = 'http://faculty.fuqua.duke.edu/~dah7/DataLibrary/TF-Fac.xls'
    straddles = read_excel(link, header=14, index_col=0)
    straddles.index = (pd.DatetimeIndex(straddles.index.astype(str) + '01') +
                       offsets.MonthEnd(1))
    straddles = straddles * 100.
    tgt.append(straddles)

    # LIQ
    link = 'http://bit.ly/2pn2oBK'
    liq = read_csv(link,
                   skiprows=14,
                   delim_whitespace=True,
                   header=None,
                   usecols=[0, 3],
                   index_col=0,
                   names=['date', 'LIQ'])
    liq.index = (pd.DatetimeIndex(liq.index.astype(str) + '01') +
                 offsets.MonthEnd(1))
    liq = liq.replace(-99, np.nan) * 100.
    tgt.append(liq)

    # USD, HY
    fred = dr(['DTWEXB', 'BAMLH0A0HYM2'], 'fred', DSTART)  # daily default
    fred = (fred.asfreq('D',
                        method='ffill').fillna(method='ffill').asfreq('M'))
    fred.loc[:, 'DTWEXB'] = fred['DTWEXB'].pct_change() * 100.
    fred.loc[:, 'BAMLH0A0HYM2'] = fred['BAMLH0A0HYM2'].diff()
    tgt.append(fred)

    # PUT, BXM, RXM (CBOE options strategy indices)
    link1 = 'http://www.cboe.com/micro/put/put_86-06.xls'
    link2 = 'http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/putdailyprice.csv'

    put1 = (read_excel(link1, index_col=0, skiprows=6,
                       header=None).rename_axis('DATE'))
    put2 = read_csv(link2,
                    index_col=0,
                    parse_dates=True,
                    skiprows=7,
                    header=None).rename_axis('DATE')
    put = (pd.concat((put1, put2)).rename(columns={
        1: 'PUT'
    }).iloc[:, 0].asfreq(
        'D', method='ffill').fillna(method='ffill').asfreq('M').pct_change() *
           100.)
    tgt.append(put)

    link1 = 'http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/bxmarchive.csv'
    link2 = 'http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/bxmcurrent.csv'

    bxm1 = read_csv(link1,
                    index_col=0,
                    parse_dates=True,
                    skiprows=5,
                    header=None).rename_axis('DATE')
    bxm2 = read_csv(link2,
                    index_col=0,
                    parse_dates=True,
                    skiprows=4,
                    header=None).rename_axis('DATE')
    bxm = (pd.concat((bxm1, bxm2)).rename(columns={
        1: 'BXM'
    }).iloc[:, 0].asfreq(
        'D', method='ffill').fillna(method='ffill').asfreq('M').pct_change() *
           100.)
    tgt.append(bxm)

    link = 'http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/rxm_historical.csv'
    rxm = (read_csv(
        link, index_col=0, parse_dates=True, skiprows=2,
        header=None).rename(columns={
            1: 'RXM'
        }).rename_axis('DATE').iloc[:, 0].asfreq('D', method='ffill').fillna(
            method='ffill').asfreq('M').pct_change() * 100.)
    tgt.append(rxm)

    # Clean up data retrieved above
    # ------------------------------------------------------------------------

    factors = pd.concat(tgt, axis=1).round(2)
    newnames = {
        'Mkt-RF': 'MKT',
        'Mom   ': 'UMD',
        'ST_Rev': 'STR',
        'LT_Rev': 'LTR',
        'RESVAR': 'IVAR',
        'AC': 'ACC',
        'PTFSBD': 'BDLB',
        'PTFSFX': 'FXLB',
        'PTFSCOM': 'CMLB',
        'PTFSIR': 'IRLB',
        'PTFSSTK': 'STLB',
        'DTWEXB': 'USD',
        'BAMLH0A0HYM2': 'HY'
    }
    factors.rename(columns=newnames, inplace=True)

    # Get last valid RF date; returns will be constrained to this date
    factors = factors[:factors['RF'].last_valid_index()]

    # Subtract RF for long-only factors
    subtract = ['HY', 'PUT', 'BXM', 'RXM']

    for i in subtract:
        factors.loc[:, i] = factors[i] - factors['RF']

    return factors