Exemplo n.º 1
0
rebalend = 20210101  # a Friday, so can include last week in 2020
wd = Weekly(sql, 'Fri', rebalbeg, rebalend)  # Generate Friday-end weekly cal

# Retrieve weekly returns, standardize scores, and compute returns and i.c.
june_universe = 0  # to track when reached a June end to update universe
year = 0  # to track new year to retrieve prices in batch for screening
res = DataFrame()
tic = time.time()
for rebaldate in wd.date_range(rebalbeg, rebalend)[:-1]:
    d = bd.june_universe(rebaldate)
    if d != june_universe:
        june_universe = d  # update universe every June
        univ = crsp.get_universe(june_universe)  # usual CRSP universe screen
        univ = univ[univ['decile'] < 10]  # drop smalest decile stocks
    start = wd.begwk(rebaldate)  # starting date of rebalance week
    beg = bd.offset(rebaldate, 1)  # beginning date of holding week
    end = wd.endwk(beg)  # ending date of holding week

    prcdate = bd.offset(start, -1)  # require price available at start of week
    prcyear = (prcdate // 10000) * 10000
    if prcyear != year:  # retrieve new batch of prices each new year
        year = prcyear
        prc = crsp.get_range('daily',
                             'prc',
                             'date',
                             year + 101,
                             year + 1231,
                             use_cache=True)
    X = prc[prc.index.get_level_values('date') == prcdate]\
        .reset_index()\
        .set_index('permno')\
    with open(os.path.join(outdir, 'index.html'), 'wt') as f:
        f.write('<h1>Quant Factors Zoo</h1><br>')
        f.write(' <br>')
        f.write('<p>\n')

# Momentum and divyld from CRSP monthly
if 'monthly' in testable:
    if regenerate:
        beg, end = 19251231, LAST_DATE
        intervals = {'mom12m': (2,12), 'mom36m': (13,36),
                     'mom6m': (2,6), 'mom1m': (1,1)}
        for label, past in intervals.items():
            out = DataFrame()
            for rebaldate in bd.date_range(bd.endmo(beg, past[1]), end, 'endmo'):
                start = bd.endmo(rebaldate, -past[1])
                beg1 = bd.offset(start, 1)
                end1 = bd.endmo(rebaldate, 1-past[0])
                df = crsp.get_universe(end1)
                df['start'] = crsp.get_section(dataset='monthly', fields=['ret'],
                                               date_field='date', date=start)\
                                  .reindex(df.index)
                df[label] = crsp.get_ret(beg1, end1).reindex(df.index)
                df['permno'] = df.index
                df['rebaldate'] = rebaldate
                df = df.dropna(subset=['start'])
                out = out.append(df[['rebaldate', 'permno', label]],
                                 ignore_index=True)    # append rows
            n = signals.write(out, label, overwrite=True)

        beg, end = 19270101, LAST_DATE
        columns = ['chmom', 'divyld', 'indmom']
print(DataFrame(**sql.run('select * from ' + bench['ident'].key)))
"""
"""Weekly: price update and clear affected redis store
run yahoo
./redis-cli --scan --pattern '*CRSP_2021*' | xargs ./redis-cli del
"""

# Estimate daily factors
LAST_DATE = 20210618  # last date in daily prices table
bd = BusDay(sql)
bench = Benchmarks(sql, bd)
crsp = CRSP(sql, bd, rdb)
pstat = PSTAT(sql, bd)

## Rebalance and return dates, and initialize classes for calculations
rebalbeg = bd.offset(20190630)
rebals = [bd.offset(d) for d in [20200630]]
stocks = chunk_stocks(crsp, rebalbeg, LAST_DATE)
perf = DailyPerformance(stocks)

# Compute HML factor
label = 'hml'
lag = 6  # number of months to lag fundamental data
df = pstat.get_linked(  # retrieve required fields from compustat
    dataset='annual',
    date_field='datadate',
    fields=['seq', 'pstk', 'pstkrv', 'pstkl', 'txditc'],
    where=(f"indfmt = 'INDL' AND datafmt = 'STD' AND curcd = 'USD' "
           f"  AND popsrc = 'D' AND consol = 'C' "
           f"  AND seq > 0 AND datadate >= 20141201"))
            set(np.unique(prices['ticker'])))
        df = df[df['ticker'].isin(new)]
        prices = prices.append(df, sort=False)
        print(pathname, 'added prices', new)

        df = pd.read_csv(os.path.join(pathname, 'dividends.csv.gz'), sep='|')
        new = set(np.unique(df['ticker'])).difference(
            set(np.unique(dividends['ticker'])))
        df = df[df['ticker'].isin(new)]
        dividends = dividends.append(df, sort=False)
        print(pathname, 'added dividends', new)

    sql = SQL(**config.credentials['sql'], echo=config.ECHO)
    bd = BusDay(sql)
    crsp = CRSP(sql, bd, rdb=None)
    date = bd.offset(crsp_date)

    # get price and shrout as of last date
    price = crsp.get_section('daily', ['prc', 'shrout'],
                             'date',
                             date,
                             start=None)

    # get tickers to lookup permno
    tickers = crsp.get_section('names', ['tsymbol', 'date'],
                               'date',
                               date,
                               start=0).reindex(price.index)
    tickers = tickers.sort_values(['tsymbol', 'date'])\
                     .drop_duplicates(keep='last')
    tickers = tickers.reset_index().set_index('tsymbol')['permno']
Exemplo n.º 5
0
        result['mdasent'] = sentiment[year]
        result['currlen'] = len(mdas[year])
        if year - 1 in mdas:
            result['prevlen'] = len(mdas[year - 1])
            result['mdachg'] = sentiment[year] - sentiment[year - 1]

            corpus = [" ".join(mdas[year]), " ".join(mdas[year - 1])]
            cos = cosine_similarity(tf_vectorizer.fit_transform(corpus))
            result['mdacos'] = cos[0, 1]
        if ECHO:
            print(i, int(time.time() - tic), result)
        results.append(result)

# save in signals database
data = DataFrame.from_records(results)
data['rebaldate'] = bd.offset(data['date'])
print(signals.write(data, 'mdasent', overwrite=True),
      signals.write(data, 'mdachg', overwrite=True),
      signals.write(data, 'mdacos', overwrite=True))

# right join data with univ, to identify univ with missing mda
data = pd.concat([data[data['year']==year]\
                  .drop(columns=['year'])\
                  .set_index('permno')\
                  .join(univ[['year']], how='right')\
                  .reset_index()
                  for year, univ in univs.items() if year <= 2020],
                 ignore_index=True)

# save sentiment dataframe in scratch folder
from settings import pickle_load, pickle_dump
Exemplo n.º 6
0
        label,
        corr=np.corrcoef(backtest.excess, rowvar=False)[0, 1],
        num=2,
        logdir=logdir)

## Construct Mom

# Load monthly universe and stock returns from CRSP.
# Signal is stocks' total return from 12 months ago, skipping most recent month
# Construct 2-way portfolio sorts, and backtest returns
label, benchname, past, leverage = 'mom', 'Mom(mo)', (2, 12), 1
rebalbeg, rebalend = 19260101, LAST_DATE
df = []  # collect each month's momentum signal values
for rebaldate in bd.date_range(rebalbeg, rebalend, 'endmo'):
    beg = bd.endmo(rebaldate, -past[1])  # require price at this date
    start = bd.offset(beg, 1)  # start date, inclusive, of signal
    end = bd.endmo(rebaldate, 1 - past[0])  # end date of signal
    p = [
        crsp.get_universe(rebaldate),  # retrieve prices and construct signal
        crsp.get_ret(start, end)['ret'].rename(label),
        crsp.get_section('monthly', ['prc'], 'date', beg)['prc'].rename('beg'),
        crsp.get_section('monthly', ['prc'], 'date', end)['prc'].rename('end')
    ]
    q = pd.concat(p, axis=1, join='inner').reset_index().dropna()
    q['rebaldate'] = rebaldate
    df.append(q[['permno', 'rebaldate', label]])
    print(rebaldate, len(df), len(q))
df = pd.concat(df)
signals.write(df, label, overwrite=True)
portfolios = famafrench_sorts(crsp,
                              label,