Пример #1
0
def _refresh_old_pca2(x):
    _d_comb, _d_ocor, _d_opca = x
    f_list_c = _d_ocor.listdir()

    window = 200
    comb_ret = _d_comb.retrieve('Returns')

    pst_dt = crup.pd_dt_to_str(comb_ret['Date', list])
    pst_idx = [(i-window) for i, j in enumerate(pst_dt) if j == f_list_c[0]][0]
    comb_ret = comb_ret[pst_idx:, :]

    n = comb_ret.shape[0]
    tick_cols = comb_ret.tick_cols()
    # f_list_c = f_list_c[0]

    for i in range(window+1, n+1):
        # i = window+1
        comb_ret_ext = comb_ret[i-window:i-1, tick_cols].values
        y1 = np.where(np.all(~np.isnan(comb_ret_ext), axis=0).tolist())[0]
        comb_ret_ext__ = comb_ret_ext[:, y1]
        pca_wgt, pca_ret = crtp.compute_fast_pca(comb_ret_ext__.T)
        ticks = [k for j, k in enumerate(tick_cols) if j in y1]
        f_list_p = crup.pd_dt_to_str([comb_ret[i-1, 'Date', list]])[0]
        _d_opca.save(f_list_p, ticks, pca_wgt, pca_ret)
    return None
Пример #2
0
def _refresh_old_correl(x):
    # _d_comb, _d_cor, _d_ocor = x   # remove the dependency on _d_cor here (make it fixed)
    _d_comb, _d_ocor = x
    window = 200 # used to be 200 earlier
    comb_ret = _d_comb.retrieve('Returns')
    tick_cols = comb_ret.tick_cols()
    n = comb_ret.shape[0]

    # f_list = _d_cor.listdir()
    pd_fst = crup.pd_dt_to_str(comb_ret['Date', list])
    pd_fst = [i for i, j in enumerate(pd_fst) if j == cor_st_dt][0]

    pd_fst -= window
    comb_ret = comb_ret[pd_fst:, :]
    pd_fst = crup.pd_dt_to_str(comb_ret['Date', list])

    for i, j in enumerate(pd_fst):
        if i < window:
            pass
        else:
            # i = window
            # j = pd_fst[i]
            comb_ret_mini = comb_ret[i-window+1:i, :]
            comb_ret_mini = comb_ret_mini[:, tick_cols].values
            nn = comb_ret_mini.shape[0]
            nm = comb_ret_mini.shape[1]
            comb_ret_idx = np.where(np.sum(~np.isnan(comb_ret_mini), axis=0) == nn)[0]
            comb_ret_mini_ = comb_ret_mini[:, comb_ret_idx]
            y = np.corrcoef(comb_ret_mini_.T)
            cor_ = np.zeros([nm, nm])*np.nan
            for ii, jj in enumerate(comb_ret_idx):
                cor_[jj, comb_ret_idx] = y[ii, :]
            np.fill_diagonal(cor_, 1)
            _d_ocor.save(j, cor_)
    return None
Пример #3
0
def _refresh_old_correl(x):
    _d_comb, _d_ocor = x
    window = 200 # used to be 200 earlier
    comb_ret = _d_comb.retrieve('Returns')
    tick_cols = comb_ret.tick_cols()
    n = comb_ret.shape[0]

    # f_list = _d_cor.listdir()
    pd_fst = crup.pd_dt_to_str(comb_ret['Date', list])
    pd_fst = [i for i, j in enumerate(pd_fst) if j == cor_st_dt][0]

    pd_fst -= window
    comb_ret = comb_ret[pd_fst:, :]
    pd_fst = crup.pd_dt_to_str(comb_ret['Date', list])

    # pca_ret = _dummy_df(comb_ret)
    # pca_ret['PCA'] = np.nan

    for i, j in enumerate(pd_fst):
        if i < window:
            pass
        else:
            # i = window
            # j = pd_fst[i]
            comb_ret_mini = comb_ret[i-window+1:i, :]
            comb_ret_mini = comb_ret_mini[:, tick_cols].values
            nn = comb_ret_mini.shape[0]
            nm = comb_ret_mini.shape[1]
            comb_ret_idx = np.where(np.sum(~np.isnan(comb_ret_mini), axis=0) == nn)[0]
            comb_ret_mini_ = comb_ret_mini[:, comb_ret_idx]
            # comb_ret_sd_ = np.std(comb_ret_mini_, axis=0)
            # comb_ret_mn = (comb_ret_mini_[-1,:] - np.mean(comb_ret_mini_, axis=0))/comb_ret_sd_ # new

            y = np.corrcoef(comb_ret_mini_.T)
            cor_ = np.zeros([nm, nm])*np.nan
            for ii, jj in enumerate(comb_ret_idx):
                cor_[jj, comb_ret_idx] = y[ii, :]
            np.fill_diagonal(cor_, 1)

            # u, s, v = np.linalg.svd(y)
            # v0 = -v[0, :]/np.sum(np.abs(v[0,:]))
            # pca_ret[i, 'PCA'] = np.dot(v0, comb_ret_mn)
            _d_ocor.save(j, cor_)   # unremove this
    # pca_ret['PCA'] = np.cumprod(1+pca_ret['PCA']/100)
    #
    # pca_ret_ = _dummy_df(_d_comb.retrieve('Returns'))
    # pca_ret = cruf.DataFrame.merge(pca_ret_, pca_ret, on='Date')
    # _d_comb.store('PCA0', pca_ret)
    return None
Пример #4
0
def _refresh_old_exposure_matrix2(x):
    _d_opca_cl, _d_opca_wgt, _d_opca_bt = x
    f_files = _d_opca_cl.listdir()
    f_files_dt = crup.str_to_pd_dt(f_files)
    PCA_01 = _d_opca_wgt.retrieve('PCA01_Beta')
    PCA_02 = _d_opca_wgt.retrieve('PCA02_Beta')
    PCA_03 = _d_opca_wgt.retrieve('PCA03_Beta')
    PCA_04 = _d_opca_wgt.retrieve('PCA04_Beta')
    PCA_05 = _d_opca_wgt.retrieve('PCA05_Beta')
    PCA_06 = _d_opca_wgt.retrieve('PCA06_Beta')
    PCA_07 = _d_opca_wgt.retrieve('PCA07_Beta')
    PCA_08 = _d_opca_wgt.retrieve('PCA08_Beta')
    PCA_09 = _d_opca_wgt.retrieve('PCA09_Beta')
    PCA_10 = _d_opca_wgt.retrieve('PCA10_Beta')
    PCA_11 = _d_opca_wgt.retrieve('PCA11_Beta')
    PCA_12 = _d_opca_wgt.retrieve('PCA12_Beta')
    PCA_13 = _d_opca_wgt.retrieve('PCA13_Beta')
    PCA_14 = _d_opca_wgt.retrieve('PCA14_Beta')
    PCA_15 = _d_opca_wgt.retrieve('PCA15_Beta')
    PCA_16 = _d_opca_wgt.retrieve('PCA16_Beta')
    PCA_17 = _d_opca_wgt.retrieve('PCA17_Beta')
    PCA_18 = _d_opca_wgt.retrieve('PCA18_Beta')
    PCA_19 = _d_opca_wgt.retrieve('PCA19_Beta')
    PCA_20 = _d_opca_wgt.retrieve('PCA20_Beta')
    p_dt = crup.pd_dt_to_str(PCA_01['Date', list])
    tick_cols = PCA_01.tick_cols()
    for i in f_files:
        # i = f_files[0]
        i_loc = [k for k, k2 in enumerate(p_dt) if k2 == i]

        t1 = _d_opca_cl.load(i)['arr_3']
        # retain the first 5
        t1_ = np.array(t1[0:5], dtype=np.int64)
        # first PCA
        cons1 = t1_[0]+1
        if cons1 <= 9:
            pca_test = locals()['PCA_0'+str(cons1)][i_loc, :]
        else:
            pca_test = locals()['PCA_'+str(cons1)][i_loc, :]
        if any(~np.isnan(pca_test[0, tick_cols].values.astype(np.float64))):
            beta = pca_test
            for j in range(0, 4):
                cons1 = t1_[j+1]+1
                if cons1 <= 9:
                    pca_test = locals()['PCA_0'+str(cons1)][i_loc, :]
                else:
                    pca_test = locals()['PCA_'+str(cons1)][i_loc, :]
                beta.row_bind(pca_test)
            del beta['Date']
        else:
            beta = None
        if beta is not None:
            _d_opca_bt.store(i, beta)
    return None
Пример #5
0
def _refresh_lev_stats_new(x):
    _d_comb, _d_sig, _d_bt_pp, _d_cor, _d_flag = x

    _d_cor_lst = _d_cor.listdir()

    flev = _d_comb.retrieve('Lev_CC_' + _d_flag + 'D')
    _d_lev_lst = crup.pd_dt_to_str(flev['Date', list])
    flev_idx = [i for i, j in enumerate(_d_lev_lst) if (j in _d_cor_lst) and (j >= _dt_st)]  # change to PCA Beta
    flev = flev[flev_idx, :]
    _d_bt_pp.store('Lev', flev)
    flev_mult = flev[flev.tick_cols()].values.astype('float64')

    fun = _d_comb.retrieve('Univ')
    fun = fun[flev_idx, :]
    fun[:, fun.tick_cols()] = fun[fun.tick_cols()].values.astype(int) * (flev_mult < 5).astype(int)
    _d_bt_pp.store('Univ', fun)

    fliq = _d_comb.retrieve('Adv')
    fliq = fliq[flev_idx, :]
    fliq[:, fliq.tick_cols()] = fliq[fliq.tick_cols()].values.astype('float64')/flev_mult
    _d_bt_pp.store('Adv', fliq)

    for ii in _d_comb_data:
        fi = _d_comb.retrieve(ii)
        fi = fi[flev_idx, :]
        fi[:, fi.tick_cols()] = fi[fi.tick_cols()].values.astype('float64')*flev_mult
        _d_bt_pp.store(ii, fi)

    for ii in _d_sig_data1:
        fi = _d_sig.retrieve(ii)
        fi = fi[flev_idx, :]
        fi[:, fi.tick_cols()] = fi[fi.tick_cols()].values.astype('float64')*flev_mult
        _d_bt_pp.store(ii, fi)

    for ii in _d_sig_data2:
        fi = _d_sig.retrieve(ii)
        fi = fi[flev_idx, :]
        fi[:, fi.tick_cols()] = fi[fi.tick_cols()].values.astype('float64')*flev_mult
        _d_bt_pp.store(ii, fi)

    # Returns uses lagged leverage (!)
    fret = _d_comb.retrieve('Returns')
    fret = fret[flev_idx, :]
    for ii in fret.tick_cols():
        fret[ii] = fret[ii].values * crtf.lag(flev[ii].values, 2)
    _d_bt_pp.store('Returns', fret)

    return None
Пример #6
0
def _refresh_correl(x):
    _d_comb, _d_cor = x
    window1 = 200
    # window1 = 250  # used to be 200 earlier
    # window2 = 167
    lmb1 = 2/(window1+1)
    # lmb2 = 2/(window2+1)
    comb_ret = _d_comb.retrieve('Returns')
    comb_vol = _d_comb.retrieve('Vol_CC_240D')    # change it to one version for 250, one for 167
    tick_cols = comb_ret.tick_cols()
    comb_zs = comb_vol.copy()
    for i in tick_cols:
        comb_zs[i] = comb_ret[i].values/comb_vol[i].values
    comb_zs_val = np.sum(~np.isnan(comb_zs[tick_cols].values.astype('float64')), axis=1)
    comb_zs_val = np.where(comb_zs_val >= 10)[0]   # use a start date
    comb_zs = comb_zs[comb_zs_val, :]

    n = comb_zs.shape[0]
    f_list_c = crup.pd_dt_to_str(comb_zs['Date', list])

    comb_zsm = comb_zs.copy()
    for i in tick_cols:
        comb_zsm[i] = comb_zsm[i]*np.exp(-(comb_zsm[i].values**2)/18+0.5)/3

    for i in range(0, n):  # n
        all_u = comb_zsm[i, tick_cols].values.astype('float64')
        all_uy = np.outer(all_u, all_u)

        all_ux = np.zeros(all_uy.shape)
        nn = all_u.shape[0]
        for jj in range(0, nn):
            for kk in range(jj, nn):
                all_ux[jj, kk] = all_u[jj]**2 + all_u[kk]**2
                if jj != kk:
                    all_ux[kk, jj] = all_ux[jj, kk]

        if i == 0:
            yf1 = np.sign(all_uy)
            yr1 = 2/(1+np.exp(-yf1))-1
            yct1 = (~np.isnan(yr1)).astype('uint16')
            # yf2 = yf1.copy()
            # yr2 = yr1.copy()
        else:
            new_idx = list(set(np.where(~np.isnan(all_u))[0]).difference(set(_cor_idx(yf1))))
            yf1 += lmb1*(2/(1-yr1**2))*(all_uy-yr1-yr1*(all_ux-2)/(1+yr1**2))
            if len(new_idx) > 0:
                yf1[new_idx, :] = np.sign(all_uy[new_idx, :])
                yf1[:, new_idx] = np.sign(all_uy[:, new_idx])
            yr1 = 2/(1+np.exp(-yf1))-1
            yct1 += (~np.isnan(yr1)).astype('uint16')

            # new_idx = list(set(np.where(~np.isnan(all_u))[0]).difference(set(_cor_idx(yf2))))
            # yf2 += lmb2*(2/(1-yr2**2))*(all_uy-yr2-yr2*(all_ux-2)/(1+yr2**2))
            # if len(new_idx) > 0:
            #     yf2[new_idx, :] = np.sign(all_uy[new_idx, :])
            #     yf2[:, new_idx] = np.sign(all_uy[:, new_idx])
            # yr2 = 2/(1+np.exp(-yf2))-1

            # yf_ = 3*yf2-2*yf1
            # yr_ = 2/(1+np.exp(-yf_))-1

            yr = np.zeros([nn, nn])*np.nan
            yr[yct1 >= window1] = yr1[yct1 >= window1]
            # yr[yct1 >= window1] = yr_[yct1 >= window1]

            if np.any(~np.isnan(yr)) and (f_list_c[i] >= cor_st_dt):
                yr[abs(yr) > 0.99] = 0.99*np.sign(yr[abs(yr) > 0.99])
                np.fill_diagonal(yr, 1)
                _d_cor.save(f_list_c[i], yr.astype('float32'))
Пример #7
0
def _generate_tradelist2(x, prm=4, eiflag=False):
    _d_bt_sig, _d_tl_id, _d_comb = x

    ret = _d_comb.retrieve('Returns')
    lev = _d_comb.retrieve('Lev_CC_120D')
    vol = _d_comb.retrieve('Vol_CC_120D')
    eq03 = _d_comb.retrieve('EQIDX_Beta_3m')
    eq06 = _d_comb.retrieve('EQIDX_Beta_6m')
    eq09 = _d_comb.retrieve('EQIDX_Beta_9m')
    eq12 = _d_comb.retrieve('EQIDX_Beta_12m')

    sp03 = _d_comb.retrieve('SP_Beta_3m')
    sp06 = _d_comb.retrieve('SP_Beta_6m')
    sp09 = _d_comb.retrieve('SP_Beta_9m')
    sp12 = _d_comb.retrieve('SP_Beta_12m')

    mom03 = _d_comb.retrieve('MOM_Beta_3m')
    mom06 = _d_comb.retrieve('MOM_Beta_6m')
    mom09 = _d_comb.retrieve('MOM_Beta_9m')
    mom12 = _d_comb.retrieve('MOM_Beta_12m')

    f_list = _d_bt_sig.listdir()
    p_dt = crup.pd_dt_to_str(lev['Date', list])
    p_dt_idx = [i for i, j in enumerate(p_dt) if j in f_list]
    # ret = ret[p_dt_idx, :]
    lev = lev[p_dt_idx, :]
    vol = vol[p_dt_idx, :]

    eq03 = eq03[p_dt_idx, :]
    eq06 = eq06[p_dt_idx, :]
    eq09 = eq09[p_dt_idx, :]
    eq12 = eq12[p_dt_idx, :]

    sp03 = sp03[p_dt_idx, :]
    sp06 = sp06[p_dt_idx, :]
    sp09 = sp09[p_dt_idx, :]
    sp12 = sp12[p_dt_idx, :]

    mom03 = mom03[p_dt_idx, :]
    mom06 = mom06[p_dt_idx, :]
    mom09 = mom09[p_dt_idx, :]
    mom12 = mom12[p_dt_idx, :]

    tc = lev.tick_cols()
    window = 200

    n = len(f_list)
    ret_coll = np.zeros(n-2)*np.nan

    for i, j in enumerate(f_list):
        # i = 0
        # j = f_list[0]
        # read the return estimates
        ret_est = _d_bt_sig.load(j)
        reti = ret_est['arr_1']
        tci = list(ret_est['arr_0'])
        tci_idx = [ii for ii, jj in enumerate(tci) if jj in tc]
        levi = lev[i, tci].values.astype('float64')
        reti_un = (reti/levi)/1000
        voli = vol[i, tci].values.astype('float64')

        eq03i = eq03[i, tci].values.astype('float64')
        eq06i = eq06[i, tci].values.astype('float64')
        eq09i = eq09[i, tci].values.astype('float64')
        eq12i = eq12[i, tci].values.astype('float64')

        sp03i = sp03[i, tci].values.astype('float64')
        sp06i = sp06[i, tci].values.astype('float64')
        sp09i = sp09[i, tci].values.astype('float64')
        sp12i = sp12[i, tci].values.astype('float64')

        mom03i = mom03[i, tci].values.astype('float64')
        mom06i = mom06[i, tci].values.astype('float64')
        mom09i = mom09[i, tci].values.astype('float64')
        mom12i = mom12[i, tci].values.astype('float64')

        ret_mini = ret[p_dt_idx[i]-window+1:p_dt_idx[i], tci].values.astype('float64')
        ret_mini[np.isnan(ret_mini)] = 0  # hygiene

        nn = len(tci)
        ei = np.ones(nn)
        cori = (np.corrcoef(ret_mini.T)+prm*np.identity(nn))/(1+prm)
        dvoli = 16*np.diag(voli)
        covi = np.linalg.inv(np.dot(dvoli, np.dot(cori, dvoli)))

        if not eiflag:
            mat_a = np.vstack((eq03i, eq06i, eq09i, eq12i,  # ei
                               sp03i, sp06i, sp09i, sp12i,
                               mom03i, mom06i, mom09i, mom12i))
        else:
            mat_a = np.vstack((ei, eq03i, eq06i, eq09i, eq12i,  # ei
                               sp03i, sp06i, sp09i, sp12i,
                               mom03i, mom06i, mom09i, mom12i))

        wgt_a = 1.5*np.dot(crts.neut_corr_mat(covi, mat_a), reti_un)
        wgt_a_sm = np.maximum(np.sum(wgt_a[wgt_a > 0]), -np.sum(wgt_a[wgt_a < 0]))
        # wgt_a_sm = np.sum(np.abs(wgt_a))
        if wgt_a_sm > 4:
            wgt_a = wgt_a*4/wgt_a_sm
        if wgt_a_sm < 0.5:
            wgt_a = wgt_a*0.5/wgt_a_sm
        _d_tl_id.save(j, tci, wgt_a)
        if i < n-2:
            reti = ret[p_dt_idx[i+2], tci].values.astype('float64')
            ret_coll[i] = np.sum(wgt_a*reti)
            # print(j, np.array([np.sum(np.abs(wgt_a)), np.max(wgt_a), np.min(wgt_a), ret_coll[i]]))
        else:
            pass
            # print(j, np.array([np.sum(np.abs(wgt_a)), np.max(wgt_a), np.min(wgt_a)]))

    # print(16*np.mean(ret_coll)/np.std(ret_coll))
    # ret_coll.shape
    f_list = f_list[:-2]
    f_list_ = crup.str_to_pd_dt(f_list)
    ret_coll = cruf.DataFrame({'Date': f_list_, 'Returns': ret_coll})
    _d_comb.store('Signal_Returns', ret_coll)

    # return ret_coll
    return None
Пример #8
0
def _generate_tradelist(x):
    _d_bt_sig, _d_tl_id, _d_comb = x

    ret = _d_comb.retrieve('Returns')
    lev = _d_comb.retrieve('Lev_CC_120D')
    vol = _d_comb.retrieve('Vol_CC_120D')
    eq03 = _d_comb.retrieve('EQIDX_Beta_3m')
    eq06 = _d_comb.retrieve('EQIDX_Beta_6m')
    eq09 = _d_comb.retrieve('EQIDX_Beta_9m')
    eq12 = _d_comb.retrieve('EQIDX_Beta_12m')

    sp03 = _d_comb.retrieve('SP_Beta_3m')
    sp06 = _d_comb.retrieve('SP_Beta_6m')
    sp09 = _d_comb.retrieve('SP_Beta_9m')
    sp12 = _d_comb.retrieve('SP_Beta_12m')

    mom03 = _d_comb.retrieve('MOM_Beta_3m')
    mom06 = _d_comb.retrieve('MOM_Beta_6m')
    mom09 = _d_comb.retrieve('MOM_Beta_9m')
    mom12 = _d_comb.retrieve('MOM_Beta_12m')

    f_list = _d_bt_sig.listdir()
    p_dt = crup.pd_dt_to_str(lev['Date', list])
    p_dt_idx = [i for i, j in enumerate(p_dt) if j in f_list]
    # ret = ret[p_dt_idx, :]
    lev = lev[p_dt_idx, :]
    vol = vol[p_dt_idx, :]

    eq03 = eq03[p_dt_idx, :]
    eq06 = eq06[p_dt_idx, :]
    eq09 = eq09[p_dt_idx, :]
    eq12 = eq12[p_dt_idx, :]

    sp03 = sp03[p_dt_idx, :]
    sp06 = sp06[p_dt_idx, :]
    sp09 = sp09[p_dt_idx, :]
    sp12 = sp12[p_dt_idx, :]

    mom03 = mom03[p_dt_idx, :]
    mom06 = mom06[p_dt_idx, :]
    mom09 = mom09[p_dt_idx, :]
    mom12 = mom12[p_dt_idx, :]

    tc = lev.tick_cols()
    window = 200

    n = len(f_list)
    ret_coll = np.zeros(n-2)*np.nan

    for i, j in enumerate(f_list):
        # i = 0
        # j = f_list[0]
        # read the return estimates
        ret_est = _d_bt_sig.load(j)
        reti = ret_est['arr_1']
        tci = list(ret_est['arr_0'])
        tci_idx = [ii for ii, jj in enumerate(tci) if jj in tc]
        levi = lev[i, tci].values.astype('float64')
        reti_un = (reti/levi)/1000
        voli = vol[i, tci].values.astype('float64')

        eq03i = eq03[i, tci].values.astype('float64')
        eq06i = eq06[i, tci].values.astype('float64')
        eq09i = eq09[i, tci].values.astype('float64')
        eq12i = eq12[i, tci].values.astype('float64')

        sp03i = sp03[i, tci].values.astype('float64')
        sp06i = sp06[i, tci].values.astype('float64')
        sp09i = sp09[i, tci].values.astype('float64')
        sp12i = sp12[i, tci].values.astype('float64')

        mom03i = mom03[i, tci].values.astype('float64')
        mom06i = mom06[i, tci].values.astype('float64')
        mom09i = mom09[i, tci].values.astype('float64')
        mom12i = mom12[i, tci].values.astype('float64')

        ret_mini = ret[p_dt_idx[i]-window+1:p_dt_idx[i], tci].values.astype('float64')
        ret_mini[np.isnan(ret_mini)] = 0  # hygiene

        nn = len(tci)
        ei = np.ones(nn)
        cori = (np.corrcoef(ret_mini.T)+2*np.identity(nn))/3
        dvoli = 16*np.diag(voli)
        covi = np.linalg.inv(np.dot(dvoli, np.dot(cori, dvoli)))

        mat_a = np.vstack((ei, eq03i, eq06i, eq09i, eq12i,
                           sp03i, sp06i, sp09i, sp12i,
                           mom03i, mom06i, mom09i, mom12i))

        wgt_a = 1.5*np.dot(crts.neut_corr_mat(covi, mat_a), reti_un)
        wgt_a_sm = np.sum(np.abs(wgt_a))
        if wgt_a_sm > 8:
            wgt_a = wgt_a*8/wgt_a_sm
        if wgt_a_sm < 1:
            wgt_a = wgt_a/wgt_a_sm
        if i < n-2:
            reti = ret[p_dt_idx[i+2], tci].values.astype('float64')
            ret_coll[i] = np.sum(wgt_a*reti)
            print(j, np.array([np.sum(np.abs(wgt_a)), np.max(wgt_a), np.min(wgt_a), ret_coll[i]]))
        else:
            print(j, np.array([np.sum(np.abs(wgt_a)), np.max(wgt_a), np.min(wgt_a)]))

    print(16*np.mean(ret_coll)/np.std(ret_coll))
Пример #9
0
def _refresh_base_data_new(x):
    _d_bt_pp, _d_cor, _d_ocor, _d_bt_bs, _d_bt_fr = x

    # read the characteristics
    flev = _d_bt_pp.retrieve('Lev')
    fadv = _d_bt_pp.retrieve('Adv')
    fun = _d_bt_pp.retrieve('Univ')
    frt = _d_bt_pp.retrieve('Returns')

    # read the betas
    fsp3 = _d_bt_pp.retrieve('SP_Beta_3m')
    fsp6 = _d_bt_pp.retrieve('SP_Beta_6m')
    fsp9 = _d_bt_pp.retrieve('SP_Beta_9m')
    fsp12 = _d_bt_pp.retrieve('SP_Beta_12m')
    fmom3 = _d_bt_pp.retrieve('MOM_Beta_3m')
    fmom6 = _d_bt_pp.retrieve('MOM_Beta_6m')
    fmom9 = _d_bt_pp.retrieve('MOM_Beta_9m')
    fmom12 = _d_bt_pp.retrieve('MOM_Beta_12m')
    feqx3 = _d_bt_pp.retrieve('EQIDX_Beta_3m')
    feqx6 = _d_bt_pp.retrieve('EQIDX_Beta_6m')
    feqx9 = _d_bt_pp.retrieve('EQIDX_Beta_9m')
    feqx12 = _d_bt_pp.retrieve('EQIDX_Beta_12m')
    flv3 = _d_bt_pp.retrieve('LVOL_Beta_3m')
    flv6 = _d_bt_pp.retrieve('LVOL_Beta_6m')
    flv9 = _d_bt_pp.retrieve('LVOL_Beta_9m')
    flv12 = _d_bt_pp.retrieve('LVOL_Beta_12m')

    # read the signals
    flr_10 = _d_bt_pp.retrieve('LrB_10D')
    flr_20 = _d_bt_pp.retrieve('LrB_20D')
    flr_30 = _d_bt_pp.retrieve('LrB_30D')
    flr_40 = _d_bt_pp.retrieve('LrB_40D')
    flr_50 = _d_bt_pp.retrieve('LrB_50D')
    flr_60 = _d_bt_pp.retrieve('LrB_60D')
    flr_70 = _d_bt_pp.retrieve('LrB_70D')
    flr_80 = _d_bt_pp.retrieve('LrB_80D')
    flr_90 = _d_bt_pp.retrieve('LrB_90D')
    flr_100 = _d_bt_pp.retrieve('LrB_100D')
    flr_110 = _d_bt_pp.retrieve('LrB_110D')
    flr_120 = _d_bt_pp.retrieve('LrB_120D')
    flr_130 = _d_bt_pp.retrieve('LrB_130D')
    flr_140 = _d_bt_pp.retrieve('LrB_140D')
    flr_150 = _d_bt_pp.retrieve('LrB_150D')
    flr_160 = _d_bt_pp.retrieve('LrB_160D')
    flr_170 = _d_bt_pp.retrieve('LrB_170D')
    flr_180 = _d_bt_pp.retrieve('LrB_180D')
    flr_190 = _d_bt_pp.retrieve('LrB_190D')
    flr_200 = _d_bt_pp.retrieve('LrB_200D')
    flr_210 = _d_bt_pp.retrieve('LrB_210D')
    flr_220 = _d_bt_pp.retrieve('LrB_220D')
    flr_230 = _d_bt_pp.retrieve('LrB_230D')
    flr_240 = _d_bt_pp.retrieve('LrB_240D')
    flr_250 = _d_bt_pp.retrieve('LrB_250D')
    flr_260 = _d_bt_pp.retrieve('LrB_260D')
    flr_270 = _d_bt_pp.retrieve('LrB_270D')
    flr_280 = _d_bt_pp.retrieve('LrB_280D')
    flr_290 = _d_bt_pp.retrieve('LrB_290D')
    flr_300 = _d_bt_pp.retrieve('LrB_300D')
    flr_310 = _d_bt_pp.retrieve('LrB_310D')
    flr_320 = _d_bt_pp.retrieve('LrB_320D')
    flr_330 = _d_bt_pp.retrieve('LrB_330D')
    flr_340 = _d_bt_pp.retrieve('LrB_340D')
    flr_350 = _d_bt_pp.retrieve('LrB_350D')
    flr_360 = _d_bt_pp.retrieve('LrB_360D')
    flr_370 = _d_bt_pp.retrieve('LrB_370D')
    flr_380 = _d_bt_pp.retrieve('LrB_380D')
    flr_390 = _d_bt_pp.retrieve('LrB_390D')
    flr_400 = _d_bt_pp.retrieve('LrB_400D')
    flr_410 = _d_bt_pp.retrieve('LrB_410D')
    flr_420 = _d_bt_pp.retrieve('LrB_420D')
    flr_430 = _d_bt_pp.retrieve('LrB_430D')
    flr_440 = _d_bt_pp.retrieve('LrB_440D')
    flr_450 = _d_bt_pp.retrieve('LrB_450D')
    flr_460 = _d_bt_pp.retrieve('LrB_460D')
    flr_470 = _d_bt_pp.retrieve('LrB_470D')
    flr_480 = _d_bt_pp.retrieve('LrB_480D')
    flr_490 = _d_bt_pp.retrieve('LrB_490D')
    flr_500 = _d_bt_pp.retrieve('LrB_500D')


    # read the signals
    fqr_10 = _d_bt_pp.retrieve('QrB_10D')
    fqr_20 = _d_bt_pp.retrieve('QrB_20D')
    fqr_30 = _d_bt_pp.retrieve('QrB_30D')
    fqr_40 = _d_bt_pp.retrieve('QrB_40D')
    fqr_50 = _d_bt_pp.retrieve('QrB_50D')
    fqr_60 = _d_bt_pp.retrieve('QrB_60D')
    fqr_70 = _d_bt_pp.retrieve('QrB_70D')
    fqr_80 = _d_bt_pp.retrieve('QrB_80D')
    fqr_90 = _d_bt_pp.retrieve('QrB_90D')
    fqr_100 = _d_bt_pp.retrieve('QrB_100D')
    fqr_110 = _d_bt_pp.retrieve('QrB_110D')
    fqr_120 = _d_bt_pp.retrieve('QrB_120D')
    fqr_130 = _d_bt_pp.retrieve('QrB_130D')
    fqr_140 = _d_bt_pp.retrieve('QrB_140D')
    fqr_150 = _d_bt_pp.retrieve('QrB_150D')
    fqr_160 = _d_bt_pp.retrieve('QrB_160D')
    fqr_170 = _d_bt_pp.retrieve('QrB_170D')
    fqr_180 = _d_bt_pp.retrieve('QrB_180D')
    fqr_190 = _d_bt_pp.retrieve('QrB_190D')
    fqr_200 = _d_bt_pp.retrieve('QrB_200D')
    fqr_210 = _d_bt_pp.retrieve('QrB_210D')
    fqr_220 = _d_bt_pp.retrieve('QrB_220D')
    fqr_230 = _d_bt_pp.retrieve('QrB_230D')
    fqr_240 = _d_bt_pp.retrieve('QrB_240D')
    fqr_250 = _d_bt_pp.retrieve('QrB_250D')
    fqr_260 = _d_bt_pp.retrieve('QrB_260D')
    fqr_270 = _d_bt_pp.retrieve('QrB_270D')
    fqr_280 = _d_bt_pp.retrieve('QrB_280D')
    fqr_290 = _d_bt_pp.retrieve('QrB_290D')
    fqr_300 = _d_bt_pp.retrieve('QrB_300D')
    fqr_310 = _d_bt_pp.retrieve('QrB_310D')
    fqr_320 = _d_bt_pp.retrieve('QrB_320D')
    fqr_330 = _d_bt_pp.retrieve('QrB_330D')
    fqr_340 = _d_bt_pp.retrieve('QrB_340D')
    fqr_350 = _d_bt_pp.retrieve('QrB_350D')
    fqr_360 = _d_bt_pp.retrieve('QrB_360D')
    fqr_370 = _d_bt_pp.retrieve('QrB_370D')
    fqr_380 = _d_bt_pp.retrieve('QrB_380D')
    fqr_390 = _d_bt_pp.retrieve('QrB_390D')
    fqr_400 = _d_bt_pp.retrieve('QrB_400D')
    fqr_410 = _d_bt_pp.retrieve('QrB_410D')
    fqr_420 = _d_bt_pp.retrieve('QrB_420D')
    fqr_430 = _d_bt_pp.retrieve('QrB_430D')
    fqr_440 = _d_bt_pp.retrieve('QrB_440D')
    fqr_450 = _d_bt_pp.retrieve('QrB_450D')
    fqr_460 = _d_bt_pp.retrieve('QrB_460D')
    fqr_470 = _d_bt_pp.retrieve('QrB_470D')
    fqr_480 = _d_bt_pp.retrieve('QrB_480D')
    fqr_490 = _d_bt_pp.retrieve('QrB_490D')
    fqr_500 = _d_bt_pp.retrieve('QrB_500D')

    n = flev.shape[0]
    tc = flev.tick_cols()
    for i in range(0, n):
        # i = 0
        _dt = crup.pd_dt_to_str([flev[i, 'Date', list]])[0]
        funi = [k for k, j in enumerate(fun[i, tc].values.astype('bool')) if j]
        tci = [j for k, j in enumerate(tc) if k in funi]

        xi = np.vstack((flev[i, tci].values.astype('float32'),  # 0
                        fadv[i, tci].values.astype('float32'),  # 1
                        feqx3[i, tci].values.astype('float32'),  # 2
                        feqx6[i, tci].values.astype('float32'),  # 3
                        feqx9[i, tci].values.astype('float32'),  # 4
                        feqx12[i, tci].values.astype('float32'),  # 5
                        fsp3[i, tci].values.astype('float32'),  # 6
                        fsp6[i, tci].values.astype('float32'),  # 7
                        fsp9[i, tci].values.astype('float32'),  # 8
                        fsp12[i, tci].values.astype('float32'),  # 9
                        fmom3[i, tci].values.astype('float32'),  # 10
                        fmom6[i, tci].values.astype('float32'),  # 11
                        fmom9[i, tci].values.astype('float32'),  # 12
                        fmom12[i, tci].values.astype('float32'),  # 13
                        flv3[i, tci].values.astype('float32'),  # 14
                        flv6[i, tci].values.astype('float32'),  # 15
                        flv9[i, tci].values.astype('float32'),  # 16
                        flv12[i, tci].values.astype('float32')  # 17
                        ))

        si1 = np.vstack((flr_10[i, tci].values.astype('float32'),
                        flr_20[i, tci].values.astype('float32'),
                        flr_30[i, tci].values.astype('float32'),
                        flr_40[i, tci].values.astype('float32'),
                        flr_50[i, tci].values.astype('float32'),
                        flr_60[i, tci].values.astype('float32'),
                        flr_70[i, tci].values.astype('float32'),
                        flr_80[i, tci].values.astype('float32'),
                        flr_90[i, tci].values.astype('float32'),
                        flr_100[i, tci].values.astype('float32'),
                        flr_110[i, tci].values.astype('float32'),
                        flr_120[i, tci].values.astype('float32'),
                        flr_130[i, tci].values.astype('float32'),
                        flr_140[i, tci].values.astype('float32'),
                        flr_150[i, tci].values.astype('float32'),
                        flr_160[i, tci].values.astype('float32'),
                        flr_170[i, tci].values.astype('float32'),
                        flr_180[i, tci].values.astype('float32'),
                        flr_190[i, tci].values.astype('float32'),
                        flr_200[i, tci].values.astype('float32'),
                        flr_210[i, tci].values.astype('float32'),
                        flr_220[i, tci].values.astype('float32'),
                        flr_230[i, tci].values.astype('float32'),
                        flr_240[i, tci].values.astype('float32'),
                        flr_250[i, tci].values.astype('float32'),
                        flr_260[i, tci].values.astype('float32'),
                        flr_270[i, tci].values.astype('float32'),
                        flr_280[i, tci].values.astype('float32'),
                        flr_290[i, tci].values.astype('float32'),
                        flr_300[i, tci].values.astype('float32'),
                        flr_310[i, tci].values.astype('float32'),
                        flr_320[i, tci].values.astype('float32'),
                        flr_330[i, tci].values.astype('float32'),
                        flr_340[i, tci].values.astype('float32'),
                        flr_350[i, tci].values.astype('float32'),
                        flr_360[i, tci].values.astype('float32'),
                        flr_370[i, tci].values.astype('float32'),
                        flr_380[i, tci].values.astype('float32'),
                        flr_390[i, tci].values.astype('float32'),
                        flr_400[i, tci].values.astype('float32'),
                        flr_410[i, tci].values.astype('float32'),
                        flr_420[i, tci].values.astype('float32'),
                        flr_430[i, tci].values.astype('float32'),
                        flr_440[i, tci].values.astype('float32'),
                        flr_450[i, tci].values.astype('float32'),
                        flr_460[i, tci].values.astype('float32'),
                        flr_470[i, tci].values.astype('float32'),
                        flr_480[i, tci].values.astype('float32'),
                        flr_490[i, tci].values.astype('float32'),
                        flr_500[i, tci].values.astype('float32')))

        si2 = np.vstack((fqr_10[i, tci].values.astype('float32'),
                        fqr_20[i, tci].values.astype('float32'),
                        fqr_30[i, tci].values.astype('float32'),
                        fqr_40[i, tci].values.astype('float32'),
                        fqr_50[i, tci].values.astype('float32'),
                        fqr_60[i, tci].values.astype('float32'),
                        fqr_70[i, tci].values.astype('float32'),
                        fqr_80[i, tci].values.astype('float32'),
                        fqr_90[i, tci].values.astype('float32'),
                        fqr_100[i, tci].values.astype('float32'),
                        fqr_110[i, tci].values.astype('float32'),
                        fqr_120[i, tci].values.astype('float32'),
                        fqr_130[i, tci].values.astype('float32'),
                        fqr_140[i, tci].values.astype('float32'),
                        fqr_150[i, tci].values.astype('float32'),
                        fqr_160[i, tci].values.astype('float32'),
                        fqr_170[i, tci].values.astype('float32'),
                        fqr_180[i, tci].values.astype('float32'),
                        fqr_190[i, tci].values.astype('float32'),
                        fqr_200[i, tci].values.astype('float32'),
                        fqr_210[i, tci].values.astype('float32'),
                        fqr_220[i, tci].values.astype('float32'),
                        fqr_230[i, tci].values.astype('float32'),
                        fqr_240[i, tci].values.astype('float32'),
                        fqr_250[i, tci].values.astype('float32'),
                        fqr_260[i, tci].values.astype('float32'),
                        fqr_270[i, tci].values.astype('float32'),
                        fqr_280[i, tci].values.astype('float32'),
                        fqr_290[i, tci].values.astype('float32'),
                        fqr_300[i, tci].values.astype('float32'),
                        fqr_310[i, tci].values.astype('float32'),
                        fqr_320[i, tci].values.astype('float32'),
                        fqr_330[i, tci].values.astype('float32'),
                        fqr_340[i, tci].values.astype('float32'),
                        fqr_350[i, tci].values.astype('float32'),
                        fqr_360[i, tci].values.astype('float32'),
                        fqr_370[i, tci].values.astype('float32'),
                        fqr_380[i, tci].values.astype('float32'),
                        fqr_390[i, tci].values.astype('float32'),
                        fqr_400[i, tci].values.astype('float32'),
                        fqr_410[i, tci].values.astype('float32'),
                        fqr_420[i, tci].values.astype('float32'),
                        fqr_430[i, tci].values.astype('float32'),
                        fqr_440[i, tci].values.astype('float32'),
                        fqr_450[i, tci].values.astype('float32'),
                        fqr_460[i, tci].values.astype('float32'),
                        fqr_470[i, tci].values.astype('float32'),
                        fqr_480[i, tci].values.astype('float32'),
                        fqr_490[i, tci].values.astype('float32'),
                        fqr_500[i, tci].values.astype('float32')))

        if i < n-2:
            yi = frt[i+2, tci].values.astype('float64')

        ci = _d_cor.load(_dt)
        ci = ci[:, funi]
        ci = ci[funi, :]

        oci = _d_ocor.load(_dt)
        oci = oci[:, funi]
        oci = oci[funi, :]
        # ci = ci.astype('float64')

        # remove stocks with missing data
        v_idx1 = np.sum(~np.isnan(xi), axis=0)/xi.shape[0]
        v_idx2 = np.sum(~np.isnan(si1), axis=0)/si1.shape[0]
        v_idx3 = np.sum(~np.isnan(si2), axis=0)/si2.shape[0]
        v_idx = np.minimum(v_idx1, v_idx2, v_idx3)
        v_idx = [k for k, j in enumerate(v_idx) if j > 0.999999]
        xi = xi[:, v_idx]
        si1 = si1[:, v_idx]
        si2 = si2[:, v_idx]
        ci = ci[:, v_idx]
        ci = ci[v_idx, :]
        oci = oci[:, v_idx]
        oci = oci[v_idx, :]
        tci = [j for i, j in enumerate(tci) if i in v_idx]
        if i < n-2:
            yi = yi[v_idx]

        # remove stocks with no correlation values (hygiene)
        n_ = ci.shape[0]
        c_idx = []
        for j in range(0, n_):
            cori_r = np.all(np.isnan(np.array([l for k, l in enumerate(ci[j, :]) if k != j])))
            cori_c = np.all(np.isnan(np.array([l for k, l in enumerate(ci[:, j]) if k != j])))
            ocori_r = np.all(np.isnan(np.array([l for k, l in enumerate(oci[j, :]) if k != j])))
            ocori_c = np.all(np.isnan(np.array([l for k, l in enumerate(oci[:, j]) if k != j])))
            if ~cori_r or ~cori_c or ~ocori_c or ~ocori_r:
                c_idx.append(j)

        if len(c_idx) != n_:
            xi = xi[:, c_idx]
            si1 = si1[:, c_idx]
            si2 = si2[:, c_idx]
            ci = ci[:, c_idx]
            ci = ci[c_idx, :]
            oci = oci[:, c_idx]
            oci = oci[c_idx, :]
            if i < n-2:
                yi = yi[c_idx]
            tci = [j for i, j in enumerate(tci) if i in c_idx]
            n_ = len(c_idx)
        ci[np.isnan(ci)] = 0.5  # hygiene incase there is something weird in cluster S
        oci[np.isnan(oci)] = 0.5  # hygiene incase there is something weird in cluster S
        np.fill_diagonal(ci, 1)  # hygiene incase there is something weird in cluster S
        np.fill_diagonal(oci, 1)  # hygiene incase there is something weird in cluster S

        # get the risk budgets
        ei = np.ones(n_, dtype='float64')
        bi1 = 1/np.dot(np.abs(ci), ei)
        bi1 /= np.sum(bi1)

        bi2 = 1/np.dot(np.abs(oci), ei)
        bi2 /= np.sum(bi2)

        sci1 = np.linalg.inv(crts.shrink_correl_fast(bi1)).astype('float32')
        sci2 = np.linalg.inv(crts.shrink_correl_fast(bi2)).astype('float32')
        xi = np.vstack((xi, bi1.astype('float32'), bi2.astype('float32')))  # 18, 19
        _d_bt_bs.save(_dt, xi, si1, si2, sci1, sci2, np.array(tci))
        if i < n-2:
            _d_bt_fr.save(_dt, yi)
    return None
Пример #10
0
def _collect_old_pca_factors(x):
    _d_opca_cl, _d_comb, _d_flag = x
    f_files = _d_opca_cl.listdir()
    f_files = sorted(f_files)
    comb_ret = _d_comb.retrieve('Returns')
    # OLD PCA used time-series directly!

    fst_dt = f_files[0]
    all_dt = crup.pd_dt_to_str(comb_ret['Date', list])
    ret_idx = [i for i, j in enumerate(all_dt) if j >= fst_dt]
    comb_ret = comb_ret[ret_idx, :]
    all_dt = [all_dt[i] for i in ret_idx]
    if len(f_files) != len(all_dt):
        raise NotImplementedError('Something is wrong')
    pca0_ret = _dummy_df(comb_ret)
    pca1_ret = _dummy_df(comb_ret)
    pca2_ret = _dummy_df(comb_ret)
    pca3_ret = _dummy_df(comb_ret)

    comb_ret_val = comb_ret[:, comb_ret.tick_cols()].values.astype('float64')

    for i, j in enumerate(f_files):
        # i = 0
        # j = f_files[i]
        _pca_s = _d_opca_cl.load(j).astype('float64')
        _comb_ret = comb_ret_val[i, :]
        _pca_s_idx = np.where(np.sum(~np.isnan(_pca_s), axis=0) > 0)[0]
        _pca_s = _pca_s[:, _pca_s_idx]
        _comb_ret = _comb_ret[_pca_s_idx]

        ei = np.ones(_pca_s.shape[1])
        _pca_norm = np.dot(abs(_pca_s), ei)

        _pca_ret = np.dot(_pca_s, _comb_ret)
        _pca_ret = _pca_ret/_pca_norm

        if _d_flag == 0:
            pca0_ret[i, 'PCA'] = _pca_ret[0]
            pca1_ret[i, 'PCA'] = _pca_ret[1]
            pca2_ret[i, 'PCA'] = _pca_ret[10]
            pca3_ret[i, 'PCA'] = _pca_ret[3]
        elif _d_flag == 1:
            pca0_ret[i, 'PCA'] = _pca_ret[0]
            pca1_ret[i, 'PCA'] = _pca_ret[1]
            pca2_ret[i, 'PCA'] = _pca_ret[2]
            pca3_ret[i, 'PCA'] = _pca_ret[16]
        elif _d_flag == 2:
            pca0_ret[i, 'PCA'] = _pca_ret[0]
            pca1_ret[i, 'PCA'] = _pca_ret[1]
            pca2_ret[i, 'PCA'] = _pca_ret[2]
            pca3_ret[i, 'PCA'] = _pca_ret[11]
        elif _d_flag == 3:
            pca0_ret[i, 'PCA'] = _pca_ret[0]
            pca1_ret[i, 'PCA'] = _pca_ret[16]
            pca2_ret[i, 'PCA'] = _pca_ret[8]
            pca3_ret[i, 'PCA'] = _pca_ret[2]
        elif _d_flag == 4:
            pca0_ret[i, 'PCA'] = _pca_ret[0]
            pca1_ret[i, 'PCA'] = _pca_ret[8]
            pca2_ret[i, 'PCA'] = _pca_ret[3]
            pca3_ret[i, 'PCA'] = _pca_ret[18]
        elif _d_flag == 5:
            pca0_ret[i, 'PCA'] = _pca_ret[0]
            pca1_ret[i, 'PCA'] = _pca_ret[5]
            pca2_ret[i, 'PCA'] = _pca_ret[17]
            pca3_ret[i, 'PCA'] = _pca_ret[8]
        elif _d_flag == 6:
            pca0_ret[i, 'PCA'] = _pca_ret[0]
            pca1_ret[i, 'PCA'] = _pca_ret[2]
            pca2_ret[i, 'PCA'] = _pca_ret[17]
            pca3_ret[i, 'PCA'] = _pca_ret[16]
        else:  # _d_flag == 7
            pca0_ret[i, 'PCA'] = _pca_ret[0]
            pca1_ret[i, 'PCA'] = _pca_ret[4]
            pca2_ret[i, 'PCA'] = _pca_ret[1]
            pca3_ret[i, 'PCA'] = _pca_ret[3]

    _d_comb.store('OPCA0_Returns', pca0_ret)
    _d_comb.store('OPCA1_Returns', pca1_ret)
    _d_comb.store('OPCA2_Returns', pca2_ret)
    _d_comb.store('OPCA3_Returns', pca3_ret)
    pca0_ret['PCA'] = np.cumprod(1+pca0_ret['PCA'].values)
    pca1_ret['PCA'] = np.cumprod(1+pca1_ret['PCA'].values)
    pca2_ret['PCA'] = np.cumprod(1+pca2_ret['PCA'].values)
    pca3_ret['PCA'] = np.cumprod(1+pca3_ret['PCA'].values)
    _d_comb.store('OPCA0', pca0_ret)
    _d_comb.store('OPCA1', pca1_ret)
    _d_comb.store('OPCA2', pca2_ret)
    _d_comb.store('OPCA3', pca3_ret)
    return None
Пример #11
0
def _collect_pca_factors(x):
    _d_pca_cl, _d_comb, _d_flag = x
    f_files = _d_pca_cl.listdir()
    f_files = sorted(f_files)
    comb_ret = _d_comb.retrieve('Returns')

    # newly added
    comb_vol = _d_comb.retrieve('VOL_CC_240D')
    tc = comb_ret.tick_cols()
    for i in tc:
        comb_ret[i] = comb_ret[i].values/comb_vol[i].values

    fst_dt = f_files[0]
    all_dt = crup.pd_dt_to_str(comb_ret['Date', list])
    ret_idx = [i for i, j in enumerate(all_dt) if j >= fst_dt]
    comb_ret = comb_ret[ret_idx, :]
    all_dt = [all_dt[i] for i in ret_idx]
    if len(f_files) != len(all_dt):
        raise NotImplementedError('Something is wrong')
    pca0_ret = _dummy_df(comb_ret)
    pca1_ret = _dummy_df(comb_ret)
    pca2_ret = _dummy_df(comb_ret)
    pca3_ret = _dummy_df(comb_ret)

    comb_ret_val = comb_ret[:, comb_ret.tick_cols()].values.astype('float64')

    for i, j in enumerate(f_files):
        # i = 0
        # j = f_files[i]
        _pca_s = _d_pca_cl.load(j).astype('float64')
        _comb_ret = comb_ret_val[i, :]
        _pca_s_idx = np.where(np.sum(~np.isnan(_pca_s), axis=0) > 0)[0]
        _pca_s = _pca_s[:, _pca_s_idx]
        # _pca_ret_ = np.dot(_pca_s, _comb_ret[_pca_s_idx])*0.001

        _pca_s_n = np.sum(np.abs(_pca_s), axis=1)
        for jj in range(0, _pca_s.shape[0]):
            _pca_s[jj, :] = _pca_s[jj, :]/_pca_s_n[jj]

        _comb_ret = _comb_ret[_pca_s_idx]
        _pca_ret = np.dot(_pca_s, _comb_ret)
        _pca_ret *= 0.01

        if _d_flag == 0:
            pca0_ret[i, 'PCA'] = _pca_ret[0]
            pca1_ret[i, 'PCA'] = _pca_ret[1]
            pca2_ret[i, 'PCA'] = _pca_ret[2]
            pca3_ret[i, 'PCA'] = _pca_ret[8]
        elif _d_flag == 1:
            pca0_ret[i, 'PCA'] = _pca_ret[0]
            pca1_ret[i, 'PCA'] = _pca_ret[1]
            pca2_ret[i, 'PCA'] = _pca_ret[18]
            pca3_ret[i, 'PCA'] = _pca_ret[2]
        elif _d_flag == 2:
            pca0_ret[i, 'PCA'] = _pca_ret[0]
            pca1_ret[i, 'PCA'] = _pca_ret[1]
            pca2_ret[i, 'PCA'] = _pca_ret[2]
            pca3_ret[i, 'PCA'] = _pca_ret[3]
        elif _d_flag == 3:
            pca0_ret[i, 'PCA'] = _pca_ret[0]
            pca1_ret[i, 'PCA'] = _pca_ret[1]
            pca2_ret[i, 'PCA'] = _pca_ret[2]
            pca3_ret[i, 'PCA'] = _pca_ret[19]
        elif _d_flag == 4:
            pca0_ret[i, 'PCA'] = _pca_ret[0]
            pca1_ret[i, 'PCA'] = _pca_ret[1]
            pca2_ret[i, 'PCA'] = _pca_ret[2]
            pca3_ret[i, 'PCA'] = _pca_ret[3]
        elif _d_flag == 5:
            pca0_ret[i, 'PCA'] = _pca_ret[0]
            pca1_ret[i, 'PCA'] = _pca_ret[1]
            pca2_ret[i, 'PCA'] = _pca_ret[2]
            pca3_ret[i, 'PCA'] = _pca_ret[4]
        elif _d_flag == 6:
            pca0_ret[i, 'PCA'] = _pca_ret[0]
            pca1_ret[i, 'PCA'] = _pca_ret[18]
            pca2_ret[i, 'PCA'] = _pca_ret[15]
            pca3_ret[i, 'PCA'] = _pca_ret[4]
        else:  # _d_flag == 7
            pca0_ret[i, 'PCA'] = _pca_ret[0]
            pca1_ret[i, 'PCA'] = _pca_ret[1]
            pca2_ret[i, 'PCA'] = _pca_ret[2]
            pca3_ret[i, 'PCA'] = _pca_ret[16]

    _d_comb.store('PCA0_Returns', pca0_ret)
    _d_comb.store('PCA1_Returns', pca1_ret)
    _d_comb.store('PCA2_Returns', pca2_ret)
    _d_comb.store('PCA3_Returns', pca3_ret)
    pca0_ret['PCA'] = np.cumprod(1+pca0_ret['PCA'].values)
    pca1_ret['PCA'] = np.cumprod(1+pca1_ret['PCA'].values)
    pca2_ret['PCA'] = np.cumprod(1+pca2_ret['PCA'].values)
    pca3_ret['PCA'] = np.cumprod(1+pca3_ret['PCA'].values)
    _d_comb.store('PCA0', pca0_ret)
    _d_comb.store('PCA1', pca1_ret)
    _d_comb.store('PCA2', pca2_ret)
    _d_comb.store('PCA3', pca3_ret)
    return None