def _get_alt_price(i): print('Calculating alternating prices for %s ' % i) univ_ib_ret = mkt_retrieve(i, 'Stats', 'Returns') univ_ib_op = univ_ib_ret['Open'].values univ_ib_cl = univ_ib_ret['Close'].values univ_ib_sprc = univ_ib_ret[['Contract', 'Date']] for smth in _smth: # range(6, _smth + 1, 3) _sp = smth_param(smth) for days in range(1, _days+1): nb = filt.fst_nan(univ_ib_cl) ne = len(univ_ib_cl) univ_ib_ord = np.ones(ne, dtype=int) * 99 univ_ib_ap = np.zeros(ne) * np.nan for k in range(0, days): univ_ib_ord[nb + days + k - 1:ne:days] = k for k in range(0, days): zk = np.where(univ_ib_ord == k)[0] cl_ = univ_ib_cl[list(zk)] op_ = univ_ib_op[list(zk - days + 1)] univ_ib_ap[list(zk)] = filt.smth_price(op_, cl_, _sp[0], _sp[1], _sp[2], _sp[3], 0, _sp[4]) univ_ib_ap = filt.sma(univ_ib_ap, days) univ_ib_ap = filt.fill(univ_ib_ap) if (days < 10) and (smth < 10): univ_ib_sprc['D0'+str(days)+'S0'+str(smth)] = univ_ib_ap elif (days >= 10) and (smth < 10): univ_ib_sprc['D'+str(days)+'S0'+str(smth)] = univ_ib_ap elif (days < 10) and (smth >= 10): univ_ib_sprc['D0'+str(days)+'S'+str(smth)] = univ_ib_ap else: univ_ib_sprc['D'+str(days)+'S'+str(smth)] = univ_ib_ap del univ_ib_sprc['Contract'] mkt_store(i, 'Stats', 'AltReturns', univ_ib_sprc)
def refresh_correl(_window=240): _cls = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close') _vol = cr_cret.retrieve(univ_ib_eqidx_ext + 'vol_gk240') tcl = _cls.tick_cols() for i in tcl: _cls[i] = filt.ret(_cls[i].values)/_vol[i].values _corr = dummy_df(_cls) n = _corr.shape[0] # need to make this code parallel for j in range(0, len(tcl)-1): for k in range(j+1, len(tcl)): print('Processing rolling correlation of %s and %s' % (tcl[j], tcl[k])) _corr[tcl[j]+'_'+tcl[k]] = np.empty(n)*np.nan _tmp1 = _cls[tcl[k]].values _tmp2 = _cls[tcl[j]].values ny1 = np.maximum(filt.fst_nan(_tmp1), filt.fst_nan(_tmp2)) for _lbck in range(_window + ny1, n): _corr[_lbck, tcl[j]+'_'+tcl[k]] = bst.kendall_tau(_tmp1[_lbck-_window:_lbck], _tmp2[_lbck-_window:_lbck]) cr_cret.store(univ_ib_eqidx_ext+'Correl', _corr) return None
def get_alt_price_weekly(): univ_ib_op = cr_cret.retrieve(univ_ib_eqidx_ext + "Open") univ_ib_cl = cr_cret.retrieve(univ_ib_eqidx_ext + "Close") tick_cols = univ_ib_cl.tick_cols() univ_ib_gd = cr_cret.retrieve(univ_ib_eqidx_ext + "ExchOpen") univ_ib_ap1 = univ_ib_cl.copy() univ_ib_ap2 = univ_ib_cl.copy() univ_ib_ap3 = univ_ib_cl.copy() for i in tick_cols: # i = 'SP500' z = np.where(univ_ib_gd[i].values.astype("int") == 1)[0] if z.shape[0] > 0: univ_ib_cl_ = univ_ib_cl[i].values[z] univ_ib_op_ = univ_ib_op[i].values[z] # univ_ib_dt_ = univ_ib_cl['Date'].values[z] # temp # convert to weekly nb = filt.fst_nan(univ_ib_cl_) ne = len(univ_ib_cl_) univ_ib_ord = np.ones(ne) * 99 univ_ib_ap1_ = np.zeros(ne) * np.nan univ_ib_ap2_ = np.zeros(ne) * np.nan univ_ib_ap3_ = np.zeros(ne) * np.nan for k in range(0, 5): univ_ib_ord[nb + 4 + k : ne : 5] = k univ_ib_ord = univ_ib_ord.astype("int") for k in range(0, 5): # k = 0 zk = np.where(univ_ib_ord == k)[0] univ_ib_cl__ = univ_ib_cl_[list(zk)] univ_ib_op__ = univ_ib_op_[list(zk - 4)] univ_ib_ap1__ = filt.smth_price(univ_ib_op__, univ_ib_cl__, 120, 10, 10, 6, 1, 13) univ_ib_ap2__ = filt.smth_price(univ_ib_op__, univ_ib_cl__, 180, 15, 10, 8, 1, 13) univ_ib_ap3__ = filt.smth_price(univ_ib_op__, univ_ib_cl__, 140, 30, 10, 8, 1, 16) univ_ib_ap1_[list(zk)] = univ_ib_ap1__ univ_ib_ap2_[list(zk)] = univ_ib_ap2__ univ_ib_ap3_[list(zk)] = univ_ib_ap3__ univ_ib_ap1_ = filt.sma(univ_ib_ap1_, 5) univ_ib_ap2_ = filt.sma(univ_ib_ap2_, 5) univ_ib_ap3_ = filt.sma(univ_ib_ap3_, 5) univ_ib_ap1[list(z), i] = univ_ib_ap1_ univ_ib_ap2[list(z), i] = univ_ib_ap2_ univ_ib_ap3[list(z), i] = univ_ib_ap3_ else: univ_ib_ap1[i] = np.nan univ_ib_ap2[i] = np.nan univ_ib_ap3[i] = np.nan cr_cret.store(univ_ib_eqidx_ext + "AdjSer20W", univ_ib_ap1) cr_cret.store(univ_ib_eqidx_ext + "AdjSer30W", univ_ib_ap2) cr_cret.store(univ_ib_eqidx_ext + "AdjSer40W", univ_ib_ap3)
def check_price_forecast(): univ_ib_dt = cr_cret.retrieve(univ_ib_eqidx_ext + 'ExchOpen')['Date'].values univ_ib_gd = cr_cret.retrieve(univ_ib_eqidx_ext + 'ExchOpen')['SP500'].values z = np.where(univ_ib_gd.astype('int') == 1)[0] univ_ib_cl = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close')['SP500'].values[z] univ_ib_vl = cr_vol_all_adj.retrieve(univ_ib_eqidx_ext + 'vol_pb_120')['SP500'].values[z] univ_ib_dt = univ_ib_dt[z] nlag = 2 n1 = filt.fst_nan(univ_ib_cl) n1 = np.maximum(n1, filt.fst_nan(univ_ib_vl)) univ_ib_sig = np.empty(0) univ_ib_sig_ = np.empty(0) for days in range(1, 11): for smth in range(5, 33): if days < 10: if smth < 10: tmp = cr_aret.retrieve(univ_ib_eqidx_ext + 'AdjSerD0' + str(days)+'S0' + str(smth))['SP500'].values[z] else: tmp = cr_aret.retrieve(univ_ib_eqidx_ext + 'AdjSerD0' + str(days)+'S' + str(smth))['SP500'].values[z] else: if smth < 10: tmp = cr_aret.retrieve(univ_ib_eqidx_ext + 'AdjSerD' + str(days)+'S0' + str(smth))['SP500'].values[z] else: tmp = cr_aret.retrieve(univ_ib_eqidx_ext + 'AdjSerD' + str(days)+'S' + str(smth))['SP500'].values[z] # get the change in the signal # tmp = np.sign(filt.chg(tmp)) # lag the data # tmp = filt.lag(tmp, nlag) if univ_ib_sig.shape[0] == 0: univ_ib_sig = tmp else: univ_ib_sig = np.vstack((univ_ib_sig, tmp)) n1 = np.maximum(n1, filt.fst_nan(tmp)) univ_ib_cl_ = np.sign(filt.chg(univ_ib_cl, nlag)) univ_ib_cl_ = univ_ib_cl_[n1:] univ_ib_vl_ = univ_ib_vl[n1:] univ_ib_sig = univ_ib_sig[:, n1:] names = [] for days in range(1, 11): for smth in range(5, 33): if (days < 10) and (smth < 10): names.append('0'+str(days)+'_0'+str(smth)) elif (days < 10) and (smth >= 10): names.append('0'+str(days)+'_'+str(smth)) elif (days >= 10) and (smth < 10): names.append(str(days)+'_0'+str(smth)) else: names.append(str(days)+'_'+str(smth)) for i in range(0, len(names)): print(names[i], ':', smart_kendall(univ_ib_cl_, univ_ib_sig[i, :])) plot_ts_new(univ_ib_dt, univ_ib_cl) plot_ts_new(univ_ib_dt, univ_ib_sig[167, :]) t1 = filt.ret(filt.lag(univ_ib_sig[167, :])) t1_ = filt.ret(univ_ib_sig[167, :]) t2 = filt.ret(univ_ib_cl) t3 = t2*np.sign(t1) t1_, t2, t3 = reduce_nonnan(t1_, t2, t3) ct1_ = np.cumprod(1+t1_)*100 ct2 = np.cumprod(1+t2)*100 ct3 = np.cumprod(1-t3)*100 f = pyl.figure(1) f.clear() pyl.subplot(3, 1, 1) pyl.semilogy(ct2[-5000:]) pyl.subplot(3, 1, 2) pyl.semilogy(ct1_[-5000:]) pyl.subplot(3, 1, 3) pyl.semilogy(ct3[-5000:])
test2_ = mkt_retrieve(i, 'MovReg', 'Changes9')[['Date']+_sig_set_95_ch9] test2_.set_columns(['Date']+[k + '_CH9' for k in _sig_set_95_ch9]) test2 = DataFrame.merge(test2, test2_, on='Date') tcl2 = test2.tick_cols() for k in tcl2: test2[k] = filt.lag(test2[k].values)/testv # get the returns test1 = mkt_retrieve(i, 'Stats', 'Returns')[['Date', 'Close']] test1['Returns'] = filt.ret(test1['Close'].values)/testv del test1['Close'] tcl2 = test2.tick_cols() ny = filt.fst_nan(test1['Returns'].values) for k in tcl2: ny = np.maximum(ny, filt.fst_nan(test2[k].values)) test1 = test1[ny:, :] test2 = test2[ny:, :] if i == _eq_idx[0]: test1_all = test1.copy() test2_all = test2.copy() else: test1_all.row_bind(test1) test2_all.row_bind(test2) print('Processed ticker %s and size of dataframe is %s' % (i, str(test1_all.shape[0]))) del test1
def get_cs_factor_portfolios(): # just testing here # read the closing prices, convert it to returns _cls = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close') tcl = _cls.tick_cols() for j in tcl: _cls[j] = filt.ret(_cls[j].values) # read the volatility, signal, pca beta, lag them _vol = cr_cret.retrieve(univ_ib_eqidx_ext + 'vol_gk240') _sig = cr_sret.retrieve(univ_ib_eqidx_ext + 'D10S26_521_QRB_LVL') # _sig = cr_sret.retrieve(univ_ib_eqidx_ext + _all_signals_p1_55[2]) _pca1 = cr_cret.retrieve(univ_ib_eqidx_ext + 'PCA1_Beta') _pca2 = cr_cret.retrieve(univ_ib_eqidx_ext + 'PCA2_Beta') for j in tcl: _sig[j] = filt.lag(_sig[j].values) _pca1[j] = filt.lag(_pca1[j].values) _pca2[j] = filt.lag(_pca2[j].values) _vol[j] = filt.lag(_vol[j].values) # find starting point, at least 20 markets are live (from both beta, sig) nc = len(tcl) ny = np.zeros(nc) for j in range(0, nc): ny[j] = np.maximum(filt.fst_nan(_sig[tcl[j]].values), filt.fst_nan(_pca1[tcl[j]].values)) _ny_mn = int(np.min(ny)) _ny_mx = int(np.max(ny)) _ny = _ny_mn for j in range(_ny_mn, _ny_mx): if np.where(ny >= j)[0].shape[0] >= 20: _ny = j break # cross-sectionally normalize the signal _sign1 = _sig.copy() _sign2 = _sig.copy() _sign3 = _sig.copy() _sign4 = _sig.copy() _sign5 = _sig.copy() _sign6 = _sig.copy() for j in tcl: _sign1[j] = np.nan _sign2[j] = np.nan _sign3[j] = np.nan _sign4[j] = np.nan _sign5[j] = np.nan _sign6[j] = np.nan for j in range(_ny, _sig.shape[0]): _tmp = _sig[j, tcl] / _vol[j, tcl] _sign1[j, tcl] = half_norm_rankit(_tmp) _sign2[j, tcl] = _sign1[j, tcl].values _sign3[j, tcl] = _sign1[j, tcl].values _sign4[j, tcl] = full_norm_rankit(_tmp) _sign5[j, tcl] = _sign4[j, tcl].values _sign6[j, tcl] = _sign4[j, tcl].values # calculate returns using risk parity portfolio approaches bk_test = dummy_df(_vol) bk_test['H1'] = np.nan bk_test['H2'] = np.nan bk_test['H3'] = np.nan bk_test['F1'] = np.nan bk_test['F2'] = np.nan bk_test['F3'] = np.nan for j in range(_ny, _sig.shape[0]): # j = _ny _sign1_tmp = _sign1[j:j, tcl].values _sign2_tmp = _sign2[j:j, tcl].values _sign3_tmp = _sign3[j:j, tcl].values _sign4_tmp = _sign4[j:j, tcl].values _sign5_tmp = _sign5[j:j, tcl].values _sign6_tmp = _sign6[j:j, tcl].values _cls_tmp = _cls[j:j, tcl].values _vol_tmp = _vol[j:j, tcl].values _pca1_tmp = _pca1[j:j, tcl].values _pca2_tmp = _pca2[j:j, tcl].values _sign1_tmp, _sign2_tmp, _sign3_tmp, _sign4_tmp, _sign5_tmp, _sign6_tmp, _cls_tmp, _vol_tmp, _pca1_tmp,\ _pca2_tmp = reduce_nonnan(_sign1_tmp, _sign2_tmp, _sign3_tmp, _sign4_tmp, _sign5_tmp, _sign6_tmp, _cls_tmp, _vol_tmp, _pca1_tmp, _pca2_tmp) _tmp11 = np.dot(_pca1_tmp, _pca1_tmp) _tmp22 = np.dot(_pca2_tmp, _pca2_tmp) _tmps11 = np.dot(_sign1_tmp, _pca1_tmp) _tmps12 = np.dot(_sign1_tmp, _pca2_tmp) _tmps21 = np.dot(_sign4_tmp, _pca1_tmp) _tmps22 = np.dot(_sign4_tmp, _pca2_tmp) _sign2_tmp = _sign2_tmp - _pca1_tmp * _tmps11 / _tmp11 _sign5_tmp = _sign5_tmp - _pca1_tmp * _tmps21 / _tmp11 _sign3_tmp = _sign3_tmp - _pca1_tmp * _tmps11 / _tmp11 - _pca2_tmp * _tmps12 / _tmp22 _sign6_tmp = _sign6_tmp - _pca1_tmp * _tmps21 / _tmp11 - _pca2_tmp * _tmps22 / _tmp22 _sign1_tmp_sum = np.sum(np.abs(_sign1_tmp)) _sign2_tmp_sum = np.sum(np.abs(_sign2_tmp)) _sign3_tmp_sum = np.sum(np.abs(_sign3_tmp)) _sign4_tmp_sum = np.sum(np.abs(_sign4_tmp)) _sign5_tmp_sum = np.sum(np.abs(_sign5_tmp)) _sign6_tmp_sum = np.sum(np.abs(_sign6_tmp)) _sign1_tmp = _sign1_tmp / _sign1_tmp_sum _sign2_tmp = _sign2_tmp / _sign2_tmp_sum _sign3_tmp = _sign3_tmp / _sign3_tmp_sum _sign4_tmp = _sign4_tmp / _sign4_tmp_sum _sign5_tmp = _sign5_tmp / _sign5_tmp_sum _sign6_tmp = _sign6_tmp / _sign6_tmp_sum _lev_tmp = 0.005 / _vol_tmp _ret_tmp = _cls_tmp * _lev_tmp bk_test[j, 'H1'] = np.dot(_sign1_tmp, _ret_tmp) bk_test[j, 'H2'] = np.dot(_sign2_tmp, _ret_tmp) bk_test[j, 'H3'] = np.dot(_sign3_tmp, _ret_tmp) bk_test[j, 'F1'] = np.dot(_sign4_tmp, _ret_tmp) bk_test[j, 'F2'] = np.dot(_sign5_tmp, _ret_tmp) bk_test[j, 'F3'] = np.dot(_sign6_tmp, _ret_tmp) print('done') _dt = bk_test['Date'].values[_ny:] f1 = plt.figure(1) for jidx, j in enumerate(['H1', 'H2', 'H3']): _sh = np.nanmean(bk_test[j].values)*16/np.nanstd(bk_test[j].values) if _sh > 0: _mx = avg_drawdown(bk_test[_ny:, j].values) / (16 * np.std(bk_test[_ny:, j].values)) testh = conv_to_price(bk_test[j].values) else: _mx = avg_drawdown(-bk_test[_ny:, j].values) / (16 * np.std(bk_test[_ny:, j].values)) testh = conv_to_price(-bk_test[j].values) _sh = -_sh print([_sh, _mx]) plt.subplot(3, 1, jidx+1) plot_ts_new(_dt, testh[_ny:]) f2 = plt.figure(2) for jidx, j in enumerate(['F1', 'F2', 'F3']): _sh = np.nanmean(bk_test[j].values) * 16 / np.nanstd(bk_test[j].values) if _sh > 0: _mx = avg_drawdown(bk_test[_ny:, j].values) / (16 * np.std(bk_test[_ny:, j].values)) testh = conv_to_price(bk_test[j].values) else: _mx = avg_drawdown(-bk_test[_ny:, j].values) / (16 * np.std(bk_test[_ny:, j].values)) testh = conv_to_price(-bk_test[j].values) _sh = -_sh print([_sh, _mx]) plt.subplot(3, 1, jidx + 1) plot_ts_new(_dt, testh[_ny:]) _bk_test = bk_test[_ny:, bk_test.tick_cols()].values print(np.corrcoef(_bk_test.T)) # bk_test3 = bk_test.copy() f1.clear() f2.clear() plt.subplot(3, 1, 1) tmp = bk_test1[:, 'F1'].values plot_ts_new(_dt, conv_to_price(tmp)[_ny:]) print(np.nanmean(tmp)*16/np.nanstd(tmp)) plt.subplot(3, 1, 2) tmp = bk_test2[:, 'F1'].values plot_ts_new(_dt, conv_to_price(tmp)[_ny:]) print(np.nanmean(tmp)*16/np.nanstd(tmp)) plt.subplot(3, 1, 3) tmp = 0.5 * -bk_test3[:, 'F1'].values + 0.5 * bk_test2[:, 'F2'].values plot_ts_new(_dt, conv_to_price(tmp)[_ny:]) print(np.nanmean(tmp) * 16 / np.nanstd(tmp))
def refresh_pca(): correl1 = cr_cret.retrieve(univ_ib_eqidx_ext+'CorrelSmooth') _correl_ = dummy_df(correl1) nc = len(_tcl_liq) for k in range(0, nc-1): for l in range(k+1, nc): _correl_[_tcl_liq[k]+'_'+_tcl_liq[l]] = correl1[_tcl_liq[k]+'_'+_tcl_liq[l]].values _vol = cr_cret.retrieve(univ_ib_eqidx_ext+'vol_gk240') # find the first element when all liquid contracts are trading ny = 0 for j in range(0, nc): ny = np.maximum(ny, filt.fst_nan(_vol[_tcl_liq[j]].values)) n = _vol.shape[0] _ny = np.zeros(nc) for j in range(0, nc): _ny[j] = filt.fst_nan(_vol[_tcl_liq[j]].values) ny = int(np.min(_ny)) nc1 = int(nc*0.75) for j in range(ny, n): if np.where(_ny <= j)[0].shape[0] > nc1: ny = j break _pca1 = dummy_df(_vol) for j in _tcl_liq: _pca1[j] = np.nan _pca2 = _pca1.copy() _pca_den = dummy_df(_vol) _pca_den['PCA1_Den'] = np.nan _pca_den['PCA2_Den'] = np.nan for j in range(ny, n): # recreate the correlation matrix, j = ny _correl = _correl_[j:j, :] _mat = np.zeros((nc, nc))*np.nan _cnt = 0 for k in range(0, nc-1): for l in range(k+1, nc): _cnt += 1 _mat[k, l] = _correl[0, _tcl_liq[k]+'_'+_tcl_liq[l]] _mat[l, k] = _mat[k, l] # mark out stocks with no correlation values c_idx = [] for k in range(0, nc): _mat_r = np.all(np.isnan(np.array([l for m, l in enumerate(_mat[k, :]) if m != k]))) _mat_c = np.all(np.isnan(np.array([l for m, l in enumerate(_mat[:, k]) if m != k]))) if ~_mat_r or ~_mat_c: c_idx.append(k) tcl2 = [_tcl_liq[k] for k in c_idx] if len(tcl2) < nc1: break # reduce the correlation matrix _mat = _mat[c_idx, :] _mat = _mat[:, c_idx] nc_ = _mat.shape[0] for k in range(0, nc_): _mat[k, k] = 1 # get pca loadings _eig, _veig = np.linalg.svd(_mat)[1:3] # rows are the eigenvectors if np.sum(_veig[0, :]) < 0: _veig[0, :] = -_veig[0, :] if j > ny: _tmp1, _tmp2 = reduce_nonnan(_veig[1, :], _pca2[j-1, tcl2].values) if np.dot(_tmp1, _tmp2) < 0: _veig[1, :] = -_veig[1, :] # save the pca factors _pca1[j, tcl2] = _veig[0, :] _pca2[j, tcl2] = _veig[1, :] _pca_den[j, 'PCA1_Den'] = np.dot(np.transpose(_veig[0, :]), np.dot(_mat, _veig[0, :])) _pca_den[j, 'PCA2_Den'] = np.dot(np.transpose(_veig[1, :]), np.dot(_mat, _veig[1, :])) cr_cret.store(univ_ib_eqidx_ext + 'PCA1_CorrLoads', _pca1) cr_cret.store(univ_ib_eqidx_ext + 'PCA2_CorrLoads', _pca2) _pca_den['PCA1_Den'] = np.sqrt(_pca_den['PCA1_Den'].values) _pca_den['PCA2_Den'] = np.sqrt(_pca_den['PCA2_Den'].values) _pca1_ = _pca1.copy() _pca2_ = _pca2.copy() _chk_idx = [] tcl2 = _pca1_.tick_cols() for m in tcl2: _tmp = np.where(np.isnan(_pca1[ny:, m]))[0] _chk_idx.append(_tmp) if _tmp.shape[0] > 0: _pca1_[_tmp+ny, m] = 0 _pca2_[_tmp+ny, m] = 0 tcl = _vol.tick_cols() tcl_cr = correl1.tick_cols() _pca1_beta_ = dummy_df(_vol) _pca2_beta_ = dummy_df(_vol) for k in tcl: # k = tcl[0] _corr_i = dummy_df(_vol) for midx, m in enumerate(tcl2): # m = 'SPToronto60' if m == k: _corr_i[m] = 1.0 else: if (m + '_' + k) in tcl_cr: _corr_i[m] = correl1[m + '_' + k].values else: _corr_i[m] = correl1[k + '_' + m].values if _chk_idx[midx].shape[0] > 0: _corr_i[_chk_idx[midx] + ny, m] = 0 if midx == 0: _pca1_beta_[k] = _pca1_[m].values * _corr_i[m].values _pca2_beta_[k] = _pca2_[m].values * _corr_i[m].values else: _pca1_beta_[k] = _pca1_beta_[k].values + _pca1_[m].values * _corr_i[m].values _pca2_beta_[k] = _pca2_beta_[k].values + _pca2_[m].values * _corr_i[m].values _pca1_beta_[k] = _pca1_beta_[k].values/_pca_den['PCA1_Den'].values _pca2_beta_[k] = _pca2_beta_[k].values/_pca_den['PCA2_Den'].values cr_cret.store(univ_ib_eqidx_ext + 'PCA1_Beta', _pca1_beta_) cr_cret.store(univ_ib_eqidx_ext + 'PCA2_Beta', _pca2_beta_)
def fst_nan(x): return filt.fst_nan(x)