def gradient_boosting_result(_ticker='SP500'): univ_ib_gd = cr_cret.retrieve(univ_ib_eqidx_ext + 'ExchOpen')[_ticker].values z = list(np.where(univ_ib_gd.astype('int') == 1)[0]) _prc_names = ['CLS', 'A2D', 'A3D', 'A4D', 'A2W', 'A3W', 'A4W', 'A2M', 'A3M', 'A4M'] _trn_names = ['LVL', 'CH0', 'CH1', 'CH2'] _fil_names = ['LRB', 'QRB', 'QRG'] _hoz_names = [] for j in range(5, 305, 5): if j < 10: _hoz_names.append('00' + str(j)) elif j < 100: _hoz_names.append('0' + str(j)) else: _hoz_names.append(str(j)) # get volatility forecast univ_ib_vl = cr_vol_all_adj.retrieve(univ_ib_eqidx_ext + 'vol_pb_120')[_ticker].values[z] # get return univ_ib_cl = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close')[_ticker].values[z] univ_ib_cl = filt.ret(univ_ib_cl) univ_ib_sig_all = None for k in _trn_names: for i in _prc_names: for j in _fil_names: for mi, m in enumerate(_hoz_names): fn = univ_ib_eqidx_ext+i+'_'+j+'_'+k+'_'+m univ_ib_sig = cr_sig_mr_sg.retrieve(fn)[_ticker].values[z] if univ_ib_sig_all is None: univ_ib_sig_all = DataFrame({i+'_'+j+'_'+k+'_'+m: univ_ib_sig}) else: univ_ib_sig_all.col_bind(DataFrame({i+'_'+j+'_'+k+'_'+m: univ_ib_sig})) # just for storage - incase something happens # univ_ib_gb = DataFrame({'Close': univ_ib_cl, 'Volatility': univ_ib_vl}) # univ_ib_gb.col_bind(univ_ib_sig_all) # cr_cret.store(univ_ib_eqidx_ext + 'GBM', univ_ib_gb) reg_lookback = [120, 240, 360, 480] new_col_names = list(univ_ib_sig_all.columns) for i in new_col_names: # i = new_col_names[0] test_ = np.empty(0) for j in reg_lookback: # j = reg_lookback[0] uic = univ_ib_cl uiv = univ_ib_vl uis = filt.lag(univ_ib_sig_all[i].values, 1) uic, uis, uiv = reduce_nonnan(uic, uis, uiv) b1 = qreg.roll_e_ladreg_1d(uic, uis, j) b2 = qreg.roll_e_ladreg_1d(uic/uiv, uis/uiv, j) resid1 = uic - filt.lag(b1)*uis resid2 = uic - filt.lag(b2)*uis resid1, resid2 = reduce_nonnan(resid1, resid2) test_ = np.hstack((test_, np.array([np.median(abs(resid1)), np.median(abs(resid2))]))) print(i+' : '+np_to_str(test_))
def response_curve(x1, _ticker='SP500', f=None, md=True, sigd=False): univ_ib_gd = cr_cret.retrieve(univ_ib_eqidx_ext + 'ExchOpen')[_ticker].values univ_ib_vl = cr_vol_all_adj.retrieve(univ_ib_eqidx_ext + 'vol_pb_120')[_ticker].values univ_ib_s1 = cr_sig_mr_sg.retrieve(univ_ib_eqidx_ext + x1)[_ticker].values univ_ib_cl = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close')[_ticker].values z = list(np.where(univ_ib_gd.astype('int') == 1)[0]) univ_ib_s1 = univ_ib_s1[z] univ_ib_cl = univ_ib_cl[z] univ_ib_vl = univ_ib_vl[z] univ_ib_cl = filt.ret(univ_ib_cl)/filt.lag(univ_ib_vl, 1) univ_ib_s1 = filt.lag(univ_ib_s1, 1)/filt.lag(univ_ib_vl, 1) univ_ib_cl, univ_ib_s1 = reduce_nonnan(univ_ib_cl, univ_ib_s1) _bins = 20 _range = np.maximum(np.percentile(univ_ib_s1, 99), -np.percentile(univ_ib_s1, 1)) _delta = _range/_bins if f is not None: pyl.figure(f) else: pyl.figure(1) for i in range(0, 16): if i == 0: uis1 = univ_ib_s1 uic1 = univ_ib_cl else: uis1 = filt.lag(univ_ib_s1, i) uic1 = filt.sma(univ_ib_cl, i+1) uis1, uic1 = reduce_nonnan(uis1, uic1) uis1_b = np.linspace(-_range, _range, num=_bins+1) uic1_b = np.zeros(_bins+1)*np.nan for j in range(0, _bins+1): # j = 1 if j==0: tmp__ = np.where(uis1 <= uis1_b[j]+_delta)[0] elif j == _bins+1: tmp__ = np.where(uis1 > uis1_b[j]-_delta)[0] else: tmp__ = np.where((uis1 <= uis1_b[j]+_delta) & (uis1 > uis1_b[j]-_delta))[0] if tmp__.shape[0] > 0: if md: if not sigd: uic1_b[j] = np.nanmedian(uic1[tmp__]) #/np.nanstd(uic1[tmp__]) else: uic1_b[j] = np.nanmedian(uic1[tmp__])/np.nanstd(uic1[tmp__]) else: if not sigd: uic1_b[j] = np.nanmean(uic1[tmp__]) #/np.nanstd(uic1[tmp__]) else: uic1_b[j] = np.nanmean(uic1[tmp__])/np.nanstd(uic1[tmp__]) pyl.subplot(4, 4, i+1) pyl.plot(uis1_b, uic1_b)
def return_stats_for_various_vol(): univ_ib_cl = cr_cret.retrieve(univ_ib_ext + "Close") univ_ib_rt1 = univ_ib_cl.copy() univ_ib_rt2 = univ_ib_cl.copy() univ_ib_rt3 = univ_ib_cl.copy() for i in univ_ib_cl.tick_cols(): univ_ib_cl[i] = filt.ret(univ_ib_cl[i].values) for i in univ_ib_rt1.tick_cols(): univ_ib_rt1[i] = filt.ret(univ_ib_rt1[i].values, 27) univ_ib_rt1[i] = filt.lag(univ_ib_rt1[i].values) for i in univ_ib_rt2.tick_cols(): univ_ib_rt2[i] = filt.ret(univ_ib_rt2[i].values, 80) univ_ib_rt2[i] = filt.lag(univ_ib_rt2[i].values) for i in univ_ib_rt3.tick_cols(): univ_ib_rt3[i] = filt.ret(univ_ib_rt3[i].values, 240) univ_ib_rt3[i] = filt.lag(univ_ib_rt3[i].values) univ_ib_cl_data = univ_ib_cl[univ_ib_cl.tick_cols()].values.reshape(-1) univ_ib_rt1_data = univ_ib_rt1[univ_ib_rt1.tick_cols()].values.reshape(-1) univ_ib_rt2_data = univ_ib_rt2[univ_ib_rt2.tick_cols()].values.reshape(-1) univ_ib_rt3_data = univ_ib_rt3[univ_ib_rt3.tick_cols()].values.reshape(-1) univ_ib_cl_data = np.abs(univ_ib_cl_data) import warnings warnings.simplefilter("ignore", RuntimeWarning) univ_ib_rt1_data = np.sign(univ_ib_rt1_data) univ_ib_rt2_data = np.sign(univ_ib_rt2_data) univ_ib_rt3_data = np.sign(univ_ib_rt3_data) warnings.simplefilter("default", RuntimeWarning) for j in vol_names: for k in range(30, 330, 30): if k < 100: univ_ib_vol = cr_vol_all.retrieve(univ_ib_ext + j + "_0" + str(k) + "D") else: univ_ib_vol = cr_vol_all.retrieve(univ_ib_ext + j + "_" + str(k) + "D") for i in univ_ib_vol.tick_cols(): univ_ib_vol[i] = filt.lag(univ_ib_vol[i].values) univ_ib_vol_data = univ_ib_vol[univ_ib_vol.tick_cols()].values.reshape(-1) # univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt1_data < 0) univ_ib_vol_data = univ_ib_vol_data[univ_ib_nn] univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn] rho = ss.spearmanr(univ_ib_vol_data, univ_ib_cl_data_).correlation if k < 100: print(j + "_0" + str(k) + "D:\t", rho) else: print(j + "_" + str(k) + "D:\t", rho) print("\n")
def autocorr(x, m=1): x1 = filt.lag(x, m) x2 = np.copy(x) x1, x2 = reduce_nonnan(x1, x2) x1m = (x1 - np.mean(x1))/np.std(x1) x2m = (x2 - np.mean(x2))/np.std(x2) return np.mean(x1m*x2m)
def spline_curve(x1, _ticker='SP500', f=None, x1_=None, x2_=None, scat=True): univ_ib_gd = cr_cret.retrieve(univ_ib_eqidx_ext + 'ExchOpen')[_ticker].values univ_ib_vl = cr_vol_all_adj.retrieve(univ_ib_eqidx_ext + 'vol_pb_120')[_ticker].values univ_ib_s1 = cr_sig_mr_sg.retrieve(univ_ib_eqidx_ext + x1)[_ticker].values univ_ib_cl = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close')[_ticker].values z = list(np.where(univ_ib_gd.astype('int') == 1)[0]) univ_ib_s1 = univ_ib_s1[z] univ_ib_cl = univ_ib_cl[z] univ_ib_vl = univ_ib_vl[z] univ_ib_cl = filt.ret(univ_ib_cl) / filt.lag(univ_ib_vl, 1) univ_ib_s1 = filt.lag(univ_ib_s1, 1) / filt.lag(univ_ib_vl, 1) univ_ib_cl, univ_ib_s1 = reduce_nonnan(univ_ib_cl, univ_ib_s1) print(spearmanr(univ_ib_cl, univ_ib_s1).correlation) test_weight = np.ones(univ_ib_cl.shape[0]) / univ_ib_cl.shape[0] if x1_ is None: x1 = np.percentile(univ_ib_s1, 25) else: x1 = x1_ if x2_ is None: x2 = np.percentile(univ_ib_s1, 75) else: x2 = x2_ testa = rpf2.cubic_fit_linreg(univ_ib_cl, univ_ib_s1, x1, x2, test_weight) x_ = np.linspace(-1, 1, num=101) b0 = testa[0] b1 = testa[1] b2 = testa[2] b3 = testa[3] a0 = testa[4] c0 = testa[5] y_ = (b0+a0*(x_<x1)+c0*(x_>x2))+(b1-3*(a0/x_)*(x_<x1)-3*(c0/x_)*(x_>x2))*x_+\ (b2+3*(a0/(x_**2))*(x_<x1)+3*(c0/(x_**2))*(x_>x2))*(x_**2)+\ (b3-(a0/(x_**3))*(x_<x1)-(c0/(x_**3))*(x_>x2))*(x_**3) y_[np.where(np.isnan(y_))[0]] = b0 z_ = np.zeros(len(y_)) pyl.plot(x_, y_) pyl.plot(x_, z_) if scat: pyl.scatter(univ_ib_s1, univ_ib_cl, c='c') return None
def test_measures(): univ_ib_gd = cr_cret.retrieve(univ_ib_eqidx_ext + 'ExchOpen') _prc_names = ['CLS', 'A2D', 'A3D', 'A4D', 'A2W', 'A3W', 'A4W', 'A2M', 'A3M', 'A4M'] _trn_names = ['LVL', 'CH0', 'CH1', 'CH2'] _fil_names = ['LRB', 'QRB', 'QRG'] _hoz_names = [] for j in range(5, 305, 5): if j < 10: _hoz_names.append('00'+str(j)) elif j < 100: _hoz_names.append('0'+str(j)) else: _hoz_names.append(str(j)) _ticker = 'SP500' z = list(np.where(univ_ib_gd[_ticker].values.astype('int') ==1)[0]) univ_ib_cl = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close')[_ticker].values[z] univ_ib_cl = filt.ret(univ_ib_cl) univ_ib_cl = univ_ib_cl[-2780:] # _len = 99999 for i in _prc_names: # i = _prc_names[0] for j in _fil_names: # j = _fil_names[0] for k in _trn_names: # k = _trn_names[0] for m in _hoz_names: # m = _hoz_names[0] fn = univ_ib_eqidx_ext+i+'_'+j+'_'+k+'_'+m # print(fn) univ_ib_sig = cr_sig_mr_sg.retrieve(fn)[_ticker].values[z] univ_ib_sig1 = filt.lag(univ_ib_sig) univ_ib_sig2 = filt.lag(univ_ib_sig, 2) univ_ib_sig1 = univ_ib_sig1[-2780:] univ_ib_sig2 = univ_ib_sig2[-2780:] t1 = spearmanr(univ_ib_cl, univ_ib_sig1).correlation t2 = spearmanr(univ_ib_cl, univ_ib_sig2).correlation d1 = spearmanr(np.sign(univ_ib_cl), np.sign(univ_ib_sig1)).correlation d2 = spearmanr(np.sign(univ_ib_cl), np.sign(univ_ib_sig2)).correlation td = np.array([t1, t2, d1, d2])*100 print(i+','+j+','+k+','+m+' :\t', np_to_str(td)) return None
def pred_stats_for_various_vol(): univ_ib_cl = cr_cret.retrieve(univ_ib_ext + "Close") univ_ib_rt = univ_ib_cl.copy() for i in univ_ib_cl.tick_cols(): univ_ib_cl[i] = filt.ret(univ_ib_cl[i].values) for i in univ_ib_rt.tick_cols(): univ_ib_rt[i] = filt.ret(univ_ib_rt[i].values, 30) univ_ib_rt[i] = filt.lag(univ_ib_rt[i].values) univ_ib_cl_data = univ_ib_cl[univ_ib_cl.tick_cols()].values.reshape(-1) univ_ib_rt_data = univ_ib_rt[univ_ib_rt.tick_cols()].values.reshape(-1) # univ_ib_cl_data = np.abs(univ_ib_cl_data) import warnings warnings.simplefilter("ignore", RuntimeWarning) univ_ib_rt_data = np.sign(univ_ib_rt_data) warnings.simplefilter("default", RuntimeWarning) for j in vol_names_sm: for k in range(30, 330, 30): # changed here if k < 100: univ_ib_vol = cr_vol_all.retrieve(univ_ib_ext + j + "_0" + str(k) + "D") else: univ_ib_vol = cr_vol_all.retrieve(univ_ib_ext + j + "_" + str(k) + "D") for i in univ_ib_vol.tick_cols(): univ_ib_vol[i] = filt.chg(filt.lag(1 / univ_ib_vol[i].values)) univ_ib_vol_data = univ_ib_vol[univ_ib_vol.tick_cols()].values.reshape(-1) # univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) # & (univ_ib_rt_data < 0) univ_ib_vol_data = univ_ib_vol_data[univ_ib_nn] univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn] rho = ss.spearmanr(univ_ib_vol_data, univ_ib_cl_data_).correlation if k < 100: print(j + "_0" + str(k) + "D:\t", rho) else: print(j + "_" + str(k) + "D:\t", rho) print("\n")
def get_vol_adj(): univ_ib_cl = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close') univ_ib_gd = cr_cret.retrieve(univ_ib_eqidx_ext + 'ExchOpen') tick_cols = univ_ib_cl.tick_cols() for k in range(0, len(vol_list)): print('Processing volatility %s' % vol_names[k]) for j in vol_lookbacks: print('Currently working on lookback %s' % str(j)) if j < 100: univ_ib_vol = cr_vol_all.retrieve(univ_ib_eqidx_ext + vol_names[k] + '_0' + str(j)) else: univ_ib_vol = cr_vol_all.retrieve(univ_ib_eqidx_ext + vol_names[k] + '_' + str(j)) for i in tick_cols: z = np.where(univ_ib_gd[i].values.astype('int') == 1)[0] if z.shape[0] > 0: univ_ib_vol_ = univ_ib_vol[i].values[z] univ_ib_cl_ = univ_ib_cl[i].values[z] # find the beta univ_ib_vol__ = filt.lag(univ_ib_vol_) univ_ib_cl_ = np.abs(filt.ret(univ_ib_cl_)) regp = qreg4.roll_e_ladreg_1d(univ_ib_cl_, univ_ib_vol__, adj_lookback) # get the beta-adjusted volatility vol__ = univ_ib_vol_ * regp * _lapl_mult # smooth out the volatility vol__1 = filt.lrma(vol__, 61, lg=True) vol__2 = filt.lrma(vol__, 7, lg=True) vol__3 = vol__1 + filt.lrma(vol__2 - vol__1, 16) # push the new volatility back univ_ib_vol[i] = np.nan univ_ib_vol[list(z), i] = vol__3 univ_ib_vol[i] = filt.fill(univ_ib_vol[i].values) else: univ_ib_vol[i] = np.nan if j < 100: cr_vol_all_adj.store(univ_ib_eqidx_ext + vol_names[k] + '_0' + str(j), univ_ib_vol) else: cr_vol_all_adj.store(univ_ib_eqidx_ext + vol_names[k] + '_' + str(j), univ_ib_vol) return None
from CrazyCod.Utilities.smth_price_results import smth_param import warnings from CrazyCod.Utilities.frames import DataFrame import matplotlib.pyplot as plt import CrazyCod.Utilities.boosting as bst i = 'SP500' # pure momentum set of signals testv1 = mkt_retrieve(i, 'Stats', 'Volatility') for vv in ['vol_gk240']: # vv = 'vol_gk240' # lag the volatility testv = filt.lag(testv1[vv].values) test1 = mkt_retrieve(i, 'Stats', 'Returns') # lag the signal test2 = mkt_retrieve(i, 'MovReg', 'Signals') tcl2 = test2.tick_cols() for k in tcl2: test2[k] = filt.lag(test2[k].values)/testv # get the average of returns test1_ = test1[['Date', 'Close']] test1_['Returns'] = filt.ret(test1_['Close'].values) fret1 = test1_['Returns'].values / testv num_col = len(tcl2) correl_vec = np.zeros(num_col)
lookbacks = ['120', '240', '360'] np_nice_options(linelen=250, numpres=8) _norm_mult = np.sqrt(2/np.pi) _lapl_mult = 1/np.sqrt(2) _rand_mult = 0.60 tickers = ['SP500', 'DAX', 'Nikkei225', 'ESTX50', 'SMI', 'RDX', 'MSCIEM'] i = 'SP500' univ_ib_cl = mkt_retrieve(i, 'Stats', 'Returns')['Close'].values univ_ib_vl = mkt_retrieve(i, 'Stats', 'Volatility')['vol_pb240'].values univ_ib_cl = np.abs(filt.ret(univ_ib_cl)) univ_ib_vl = filt.lag(univ_ib_vl) univ_ib_cl, univ_ib_vl = reduce_nonnan(univ_ib_cl, univ_ib_vl ) univ_ib_vl *= _lapl_mult univ_ib_vl2 = np.sqrt(univ_ib_vl) univ_ib_vl2 = univ_ib_vl2 * med_abs_dev(univ_ib_vl)/med_abs_dev(univ_ib_vl2) b41 = qreg.roll_e_ladreg_1d(univ_ib_cl, univ_ib_vl, 240) b51 = qreg.roll_e_ladreg_2d(univ_ib_cl, mcc(univ_ib_vl, univ_ib_vl2), 240) resid0 = univ_ib_cl - univ_ib_vl resid1 = univ_ib_cl - univ_ib_vl * filt.lag(b41) resid2 = univ_ib_cl - univ_ib_vl * filt.lag(np.ascontiguousarray(b51[:, 0])) - univ_ib_vl2 * filt.lag(np.ascontiguousarray(b51[:, 1]))
def autocorr(x, m=1): x1 = filt.lag(x, m) x2 = np.copy(x) x1, x2 = reduce_nonnan(x1, x2) return smart_kendall(x1, x2)
def test_measures2(): pd.set_option('display.max_columns', 30) pd.set_option('display.max_rows', 100) univ_ib_gd = cr_cret.retrieve(univ_ib_eqidx_ext + 'ExchOpen') _prc_names = ['CLS', 'A2D', 'A3D', 'A4D', 'A2W', 'A3W', 'A4W', 'A2M', 'A3M', 'A4M'] _trn_names = ['LVL', 'CH0', 'CH1', 'CH2'] _fil_names = ['LRB', 'QRB', 'QRG'] _hoz_names = [] for j in range(5, 305, 5): if j < 10: _hoz_names.append('00'+str(j)) elif j < 100: _hoz_names.append('0'+str(j)) else: _hoz_names.append(str(j)) univ_ib_cl = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close') tick_cols = univ_ib_cl.tick_cols() for n in tick_cols: z = list(np.where(univ_ib_gd[n].values.astype('int') == 1)[0]) univ_ib_cl_ = filt.ret(univ_ib_cl[n].values[z]) univ_ib_cl[n] = np.nan univ_ib_cl[z, n] = univ_ib_cl_ univ_ib_cl[n] = filt.fill1(univ_ib_cl[n].values, 0) # tick_cols_ = tick_cols[0:1] tick_cols = ['SP500', 'SP400Mid', 'Nikkei225', 'R2000', 'FTSE100', 'DAX', 'SMI', 'CAC40', 'AEX', 'MIBFTSE', 'IBEX35', 'OMXS30'] siz_ = [5330, 7115, 6757, 7349, 5324, 6785, 6794, 6199, 7232, 7936, 7180, 7295] # siz = [2784, 999, 1357, 765, 2790, 1329, 1320, 1915, 882, 178, 934, 819] tick_cols = tick_cols[1:] siz_ = siz_[1:] for k in _trn_names: dummy_cols = [] for i in _prc_names: for j in _fil_names: dummy_cols.append(i+'_'+j) x_ = np.random.random(len(_hoz_names))*np.nan y_ = np.random.random(len(_hoz_names))*np.nan dummy_df = DataFrame({dummy_cols[0]: x_, dummy_cols[1]: y_}) for i in range(2, 30): dummy_df[dummy_cols[i]] = x_ _col = 0 for i in _prc_names: # j = _fil_names[0] for j in _fil_names: # k = _trn_names[0] for mi, m in enumerate(_hoz_names): # m = _hoz_names[0] fn = univ_ib_eqidx_ext+i+'_'+j+'_'+k+'_'+m univ_ib_sig = cr_sig_mr_sg.retrieve(fn) ret_, sig_ = np.empty(0), np.empty(0) for ni, n in enumerate(tick_cols): # ni = 0 # n = tick_cols[ni] z = list(np.where(univ_ib_gd[n].values.astype('int') == 1)[0]) z = [z_ for z_ in z if z_ > siz_[ni]] # 5 is just a buffer sig__ = univ_ib_sig[n].values[z] ret__ = univ_ib_cl[n].values[z] sig__ = filt.lag(sig__, 2) ret_ = np.hstack((ret_, ret__[2:])) sig_ = np.hstack((sig_, sig__[2:])) gidx = np.where(~np.isnan(ret_) & ~np.isnan(sig_))[0] dummy_df[mi, dummy_cols[_col]] = 100*spearmanr(ret_[gidx], sig_[gidx]).correlation _col += 1 dummy_df.show_all()
univ_ib_vol7 = cr_vol_all.retrieve(univ_ib_ext+'vol_yz_'+lookback)[['Date', ticker]] z = list(np.where(univ_ib_gd[ticker].values.astype('int') == 1)[0]) univ_ib_cl = univ_ib_cl[z, :] univ_ib_vol0 = univ_ib_vol0[z, :] univ_ib_vol1 = univ_ib_vol1[z, :] univ_ib_vol2 = univ_ib_vol2[z, :] univ_ib_vol3 = univ_ib_vol3[z, :] univ_ib_vol4 = univ_ib_vol4[z, :] univ_ib_vol5 = univ_ib_vol5[z, :] univ_ib_vol6 = univ_ib_vol6[z, :] univ_ib_vol7 = univ_ib_vol7[z, :] univ_ib_cl[ticker] = np.abs(filt.ret(univ_ib_cl[ticker].values)) univ_ib_vol0[ticker] = filt.lag(univ_ib_vol0[ticker].values) univ_ib_vol1[ticker] = filt.lag(univ_ib_vol1[ticker].values) univ_ib_vol2[ticker] = filt.lag(univ_ib_vol2[ticker].values) univ_ib_vol3[ticker] = filt.lag(univ_ib_vol3[ticker].values) univ_ib_vol4[ticker] = filt.lag(univ_ib_vol4[ticker].values) univ_ib_vol5[ticker] = filt.lag(univ_ib_vol5[ticker].values) univ_ib_vol6[ticker] = filt.lag(univ_ib_vol6[ticker].values) univ_ib_vol7[ticker] = filt.lag(univ_ib_vol7[ticker].values) univ_ib_dt_ = univ_ib_cl['Date'].values univ_ib_cl_ = univ_ib_cl[ticker].values univ_ib_vol0_ = univ_ib_vol0[ticker].values univ_ib_vol1_ = univ_ib_vol1[ticker].values univ_ib_vol2_ = univ_ib_vol2[ticker].values univ_ib_vol3_ = univ_ib_vol3[ticker].values univ_ib_vol4_ = univ_ib_vol4[ticker].values
def check_closest_volatility(): univ_ib_cl = cr_cret.retrieve(univ_ib_ext + "Close") univ_ib_rt1 = univ_ib_cl.copy() univ_ib_rt2 = univ_ib_cl.copy() univ_ib_rt3 = univ_ib_cl.copy() for i in univ_ib_cl.tick_cols(): univ_ib_cl[i] = filt.ret(univ_ib_cl[i].values) univ_ib_rt1[i] = filt.ret(univ_ib_rt1[i].values, 30) univ_ib_rt1[i] = filt.lag(univ_ib_rt1[i].values) univ_ib_rt2[i] = filt.ret(univ_ib_rt2[i].values, 60) univ_ib_rt2[i] = filt.lag(univ_ib_rt2[i].values) univ_ib_rt3[i] = filt.ret(univ_ib_rt3[i].values, 120) univ_ib_rt3[i] = filt.lag(univ_ib_rt3[i].values) univ_ib_cl_data = univ_ib_cl[univ_ib_cl.tick_cols()].values.reshape(-1) univ_ib_rt1_data = univ_ib_rt1[univ_ib_rt1.tick_cols()].values.reshape(-1) univ_ib_rt2_data = univ_ib_rt2[univ_ib_rt2.tick_cols()].values.reshape(-1) univ_ib_rt3_data = univ_ib_rt3[univ_ib_rt3.tick_cols()].values.reshape(-1) univ_ib_cl_data = np.abs(univ_ib_cl_data) import warnings warnings.simplefilter("ignore", RuntimeWarning) univ_ib_rt1_data = np.sign(univ_ib_rt1_data) univ_ib_rt2_data = np.sign(univ_ib_rt2_data) univ_ib_rt3_data = np.sign(univ_ib_rt3_data) warnings.simplefilter("default", RuntimeWarning) # vol_names_ = [i for i in vol_names if 'reg' not in i] for j in vol_names_sm: for k in range(_min_range, _max_range, _step_range * 3): if k < 100: univ_ib_vol = cr_vol_all.retrieve(univ_ib_ext + j + "_0" + str(k) + "D") else: univ_ib_vol = cr_vol_all.retrieve(univ_ib_ext + j + "_" + str(k) + "D") for i in univ_ib_vol.tick_cols(): univ_ib_vol[i] = filt.lag(univ_ib_vol[i].values) univ_ib_vol_data = univ_ib_vol[univ_ib_vol.tick_cols()].values.reshape(-1) univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn] * _norm_mult univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn] res0 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt1_data < 0) univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn] * _norm_mult univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn] res1 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt2_data < 0) univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn] * _norm_mult univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn] res2 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt3_data < 0) univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn] * _norm_mult univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn] res3 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt1_data > 0) univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn] * _norm_mult univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn] res4 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt2_data > 0) univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn] * _norm_mult univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn] res5 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt3_data > 0) univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn] * _norm_mult univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn] res6 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params if k < 100: print( j + "_0" + str(k) + "\t", np_to_str(res0), "\t", np_to_str(res1), "\t", np_to_str(res2), "\t", np_to_str(res3), "\t", np_to_str(res4), "\t", np_to_str(res5), "\t", np_to_str(res6), "\t", ) else: print( j + "_" + str(k) + "\t", np_to_str(res0), "\t", np_to_str(res1), "\t", np_to_str(res2), "\t", np_to_str(res3), "\t", np_to_str(res4), "\t", np_to_str(res5), "\t", np_to_str(res6), "\t", ) print("\n")
def get_smart_measures(): univ_ib_data = cr_cret.retrieve(univ_ib_eqidx_ext + 'GBM') univ_ib_cl = univ_ib_data['Close'].values univ_ib_vl = univ_ib_data['Volatility'].values del univ_ib_data['Close'] del univ_ib_data['Volatility'] new_col_names = list(univ_ib_data.columns) for i in new_col_names: univ_ib_data[i] = filt.lag(univ_ib_data[i].values) univ_ib_vl = filt.lag(univ_ib_vl) univ_ib_clv = univ_ib_cl/univ_ib_vl univ_ib_datav = univ_ib_data.copy() for i in new_col_names: univ_ib_datav[i] = univ_ib_datav[i].values/univ_ib_vl # decrease the magnitude of QRG signals for stability for i in new_col_names: if 'QRG' in i: univ_ib_data[i] = univ_ib_data[i].values/1000 univ_ib_datav[i] = univ_ib_datav[i].values/1000 # find the first measure meas = [] measv = [] _collect = [] _collectv = [] for i in range(0, 20): # i = 2 if i == 0: _maxtest = 0.0 _maxidx = 99999 _maxtestv = 0.0 _maxidxv = 99999 for ji, j in enumerate(new_col_names): _tmp_val = univ_ib_data[j].values _test = smart_kendall(_tmp_val[5220:], univ_ib_cl[5220:]) if abs(_test) > abs(_maxtest): _maxtest = _test _maxidx = ji _tmp_valv = univ_ib_datav[j].values _testv = smart_kendall(_tmp_valv[5220:], univ_ib_clv[5220:]) if abs(_testv) > abs(_maxtestv): _maxtestv = _testv _maxidxv = ji meas.append(new_col_names[_maxidx]) measv.append(new_col_names[_maxidxv]) # minimize the kendall between variable, and error _sval = univ_ib_data[new_col_names[_maxidx]].values _svalv = univ_ib_datav[new_col_names[_maxidxv]].values _beta = find_more_accurate_beta(univ_ib_cl[5220:], _sval[5220:]) _collect = _beta * _sval _err = univ_ib_cl - _collect _betav = find_more_accurate_beta(univ_ib_clv[5220:], _svalv[5220:]) _collectv = _betav * _svalv _errv = univ_ib_clv - _collectv else: _maxtest = 0.0 _maxidx = 99999 _maxtestv = 0.0 _maxidxv = 99999 for ji, j in enumerate(new_col_names): if j not in meas: _tmp_val = univ_ib_data[j].values _test = smart_kendall(_tmp_val[5220:], _err[5220:]) if abs(_test) > abs(_maxtest): _maxtest = _test _maxidx = ji if j not in measv: _tmp_valv = univ_ib_datav[j].values _testv = smart_kendall(_tmp_valv[5220:], _errv[5220:]) if abs(_testv) > abs(_maxtestv): _maxtestv = _testv _maxidxv = ji meas.append(new_col_names[_maxidx]) measv.append(new_col_names[_maxidxv]) # minimize the kendall between variable, and error _sval = univ_ib_data[new_col_names[_maxidx]].values _svalv = univ_ib_datav[new_col_names[_maxidxv]].values _beta = find_more_accurate_beta(_err[5220:], _sval[5220:]) _collect += _beta * _sval _err = univ_ib_cl - _collect _betav = find_more_accurate_beta(_errv[5220:], _svalv[5220:]) _collectv += _betav * _svalv _errv = univ_ib_clv - _collectv print(i) print(meas) print(measv) print(smart_kendall(_collect[5220:], univ_ib_cl[5220:]), np.std(univ_ib_cl[5220:]-_collect[5220:])) print(smart_kendall(_collectv[5220:], univ_ib_clv[5220:]), np.std(univ_ib_clv[5220:]-_collectv[5220:]))
test1 = mkt_retrieve(i, 'Stats', 'Returns') for j in rlbck: # j = rlbck[0] test2 = mkt_retrieve(i, 'MovReg', 'Signals_Pct_' + str(j)) tcl2 = test2.tick_cols() for lmb in [100]: # lmb = 0.01 print('Using lookback of %s with lambda of %s' % (str(j), str(lmb))) for k in range(2, 3): # k = 1 # lag the signal data test2_ = test2.copy() for tcl2_ in tcl2: test2_[tcl2_] = filt.lag(test2_[tcl2_].values, k) # get the average of returns test1_ = test1[['Date', 'Close']] test1_['Returns'] = filt.ret(test1_['Close'].values) test1_['MultiReturns'] = filt.ret(test1_['Close'].values, k)/k test1_['MultiReturns'] = filt.mpc(test1_['MultiReturns'].values, j) test1_['ReturnPct'] = filt.mpc(test1_['Returns'].values, j) # get beta # beta1 = qreg5.roll_s_ladreg_2d_l2n(test1_['MultiReturns'].values, # np.ascontiguousarray(test2_[test2_.tick_cols()].values), # j, lmb, 30) beta2 = qreg5.roll_w_ladreg_2d_l2n(test1_['MultiReturns'].values,
def get_smart_measures(): univ_ib_data = cr_cret.retrieve(univ_ib_eqidx_ext + 'GBM') univ_ib_cl = univ_ib_data['Close'].values univ_ib_vl = univ_ib_data['Volatility'].values del univ_ib_data['Close'] del univ_ib_data['Volatility'] new_col_names = list(univ_ib_data.columns) for i in new_col_names: univ_ib_data[i] = filt.lag(univ_ib_data[i].values) univ_ib_vl = filt.lag(univ_ib_vl) univ_ib_clv = univ_ib_cl/univ_ib_vl univ_ib_datav = univ_ib_data.copy() for i in new_col_names: univ_ib_datav[i] = univ_ib_datav[i].values/univ_ib_vl # decrease the magnitude of QRG signals for stability for i in new_col_names: if 'QRG' in i: univ_ib_data[i] = univ_ib_data[i].values/1000 univ_ib_datav[i] = univ_ib_datav[i].values/1000 # _new_col_names = new_col_names # type 1 _new_col_names = [i for i in new_col_names if ('M_' not in i) and ('W_' not in i)] # find the measures meas = [] measv = [] _collect = [] _collectv = [] for i in range(0, 20): # i = 0 if i == 0: _maxtest = 0.0 _maxidx = 99999 _maxtestv = 0.0 _maxidxv = 99999 _testcol_ = None _testcolv_ = None for ji, j in enumerate(_new_col_names): _tmp_val = univ_ib_data[j].values _testcol = find_more_accurate_beta(univ_ib_cl[5220:], _tmp_val[5220:]) * _tmp_val _test = smart_kendall(univ_ib_cl[5220:], _testcol[5220:]) if abs(_test) > abs(_maxtest): _maxtest = _test _maxidx = ji _testcol_ = _testcol _tmp_valv = univ_ib_datav[j].values _testcolv = find_more_accurate_beta(univ_ib_clv[5220:], _tmp_valv[5220:]) * _tmp_valv _testv = smart_kendall(univ_ib_clv[5220:], _testcolv[5220:]) if abs(_testv) > abs(_maxtestv): _maxtestv = _testv _maxidxv = ji _testcolv_ = _testcolv meas.append(_new_col_names[_maxidx]) measv.append(_new_col_names[_maxidxv]) _collect = _testcol_ _collectv = _testcolv_ else: _maxtest = 0.0 _maxidx = 99999 _maxtestv = 0.0 _maxidxv = 99999 _testcol_ = None _testcolv_ = None for ji, j in enumerate(_new_col_names): if j not in meas: _tmp_val = univ_ib_data[j].values _testcol = find_more_accurate_beta(univ_ib_cl[5220:]-_collect[5220:], _tmp_val[5220:]) * _tmp_val _test = smart_kendall(univ_ib_cl[5220:], _collect[5220:]+_testcol[5220:]) if abs(_test) > abs(_maxtest): _maxtest = _test _maxidx = ji _testcol_ = _testcol if j not in measv: _tmp_valv = univ_ib_datav[j].values _testcolv = find_more_accurate_beta(univ_ib_clv[5220:]-_collectv[5220:], _tmp_valv[5220:]) * _tmp_valv _testv = smart_kendall(univ_ib_clv[5220:], _collectv[5220:]+_testcolv[5220:]) if abs(_testv) > abs(_maxtestv): _maxtestv = _testv _maxidxv = ji _testcolv_ = _testcolv meas.append(_new_col_names[_maxidx]) measv.append(_new_col_names[_maxidxv]) _collect += _testcol_ _collectv += _testcolv_ print(i) print(meas) print(measv) print(smart_kendall(_collect[5220:], univ_ib_cl[5220:])) print(smart_kendall(_collectv[5220:], univ_ib_clv[5220:]))
def get_cs_factor_portfolios(): # just testing here # read the closing prices, convert it to returns _cls = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close') tcl = _cls.tick_cols() for j in tcl: _cls[j] = filt.ret(_cls[j].values) # read the volatility, signal, pca beta, lag them _vol = cr_cret.retrieve(univ_ib_eqidx_ext + 'vol_gk240') _sig = cr_sret.retrieve(univ_ib_eqidx_ext + 'D10S26_521_QRB_LVL') # _sig = cr_sret.retrieve(univ_ib_eqidx_ext + _all_signals_p1_55[2]) _pca1 = cr_cret.retrieve(univ_ib_eqidx_ext + 'PCA1_Beta') _pca2 = cr_cret.retrieve(univ_ib_eqidx_ext + 'PCA2_Beta') for j in tcl: _sig[j] = filt.lag(_sig[j].values) _pca1[j] = filt.lag(_pca1[j].values) _pca2[j] = filt.lag(_pca2[j].values) _vol[j] = filt.lag(_vol[j].values) # find starting point, at least 20 markets are live (from both beta, sig) nc = len(tcl) ny = np.zeros(nc) for j in range(0, nc): ny[j] = np.maximum(filt.fst_nan(_sig[tcl[j]].values), filt.fst_nan(_pca1[tcl[j]].values)) _ny_mn = int(np.min(ny)) _ny_mx = int(np.max(ny)) _ny = _ny_mn for j in range(_ny_mn, _ny_mx): if np.where(ny >= j)[0].shape[0] >= 20: _ny = j break # cross-sectionally normalize the signal _sign1 = _sig.copy() _sign2 = _sig.copy() _sign3 = _sig.copy() _sign4 = _sig.copy() _sign5 = _sig.copy() _sign6 = _sig.copy() for j in tcl: _sign1[j] = np.nan _sign2[j] = np.nan _sign3[j] = np.nan _sign4[j] = np.nan _sign5[j] = np.nan _sign6[j] = np.nan for j in range(_ny, _sig.shape[0]): _tmp = _sig[j, tcl] / _vol[j, tcl] _sign1[j, tcl] = half_norm_rankit(_tmp) _sign2[j, tcl] = _sign1[j, tcl].values _sign3[j, tcl] = _sign1[j, tcl].values _sign4[j, tcl] = full_norm_rankit(_tmp) _sign5[j, tcl] = _sign4[j, tcl].values _sign6[j, tcl] = _sign4[j, tcl].values # calculate returns using risk parity portfolio approaches bk_test = dummy_df(_vol) bk_test['H1'] = np.nan bk_test['H2'] = np.nan bk_test['H3'] = np.nan bk_test['F1'] = np.nan bk_test['F2'] = np.nan bk_test['F3'] = np.nan for j in range(_ny, _sig.shape[0]): # j = _ny _sign1_tmp = _sign1[j:j, tcl].values _sign2_tmp = _sign2[j:j, tcl].values _sign3_tmp = _sign3[j:j, tcl].values _sign4_tmp = _sign4[j:j, tcl].values _sign5_tmp = _sign5[j:j, tcl].values _sign6_tmp = _sign6[j:j, tcl].values _cls_tmp = _cls[j:j, tcl].values _vol_tmp = _vol[j:j, tcl].values _pca1_tmp = _pca1[j:j, tcl].values _pca2_tmp = _pca2[j:j, tcl].values _sign1_tmp, _sign2_tmp, _sign3_tmp, _sign4_tmp, _sign5_tmp, _sign6_tmp, _cls_tmp, _vol_tmp, _pca1_tmp,\ _pca2_tmp = reduce_nonnan(_sign1_tmp, _sign2_tmp, _sign3_tmp, _sign4_tmp, _sign5_tmp, _sign6_tmp, _cls_tmp, _vol_tmp, _pca1_tmp, _pca2_tmp) _tmp11 = np.dot(_pca1_tmp, _pca1_tmp) _tmp22 = np.dot(_pca2_tmp, _pca2_tmp) _tmps11 = np.dot(_sign1_tmp, _pca1_tmp) _tmps12 = np.dot(_sign1_tmp, _pca2_tmp) _tmps21 = np.dot(_sign4_tmp, _pca1_tmp) _tmps22 = np.dot(_sign4_tmp, _pca2_tmp) _sign2_tmp = _sign2_tmp - _pca1_tmp * _tmps11 / _tmp11 _sign5_tmp = _sign5_tmp - _pca1_tmp * _tmps21 / _tmp11 _sign3_tmp = _sign3_tmp - _pca1_tmp * _tmps11 / _tmp11 - _pca2_tmp * _tmps12 / _tmp22 _sign6_tmp = _sign6_tmp - _pca1_tmp * _tmps21 / _tmp11 - _pca2_tmp * _tmps22 / _tmp22 _sign1_tmp_sum = np.sum(np.abs(_sign1_tmp)) _sign2_tmp_sum = np.sum(np.abs(_sign2_tmp)) _sign3_tmp_sum = np.sum(np.abs(_sign3_tmp)) _sign4_tmp_sum = np.sum(np.abs(_sign4_tmp)) _sign5_tmp_sum = np.sum(np.abs(_sign5_tmp)) _sign6_tmp_sum = np.sum(np.abs(_sign6_tmp)) _sign1_tmp = _sign1_tmp / _sign1_tmp_sum _sign2_tmp = _sign2_tmp / _sign2_tmp_sum _sign3_tmp = _sign3_tmp / _sign3_tmp_sum _sign4_tmp = _sign4_tmp / _sign4_tmp_sum _sign5_tmp = _sign5_tmp / _sign5_tmp_sum _sign6_tmp = _sign6_tmp / _sign6_tmp_sum _lev_tmp = 0.005 / _vol_tmp _ret_tmp = _cls_tmp * _lev_tmp bk_test[j, 'H1'] = np.dot(_sign1_tmp, _ret_tmp) bk_test[j, 'H2'] = np.dot(_sign2_tmp, _ret_tmp) bk_test[j, 'H3'] = np.dot(_sign3_tmp, _ret_tmp) bk_test[j, 'F1'] = np.dot(_sign4_tmp, _ret_tmp) bk_test[j, 'F2'] = np.dot(_sign5_tmp, _ret_tmp) bk_test[j, 'F3'] = np.dot(_sign6_tmp, _ret_tmp) print('done') _dt = bk_test['Date'].values[_ny:] f1 = plt.figure(1) for jidx, j in enumerate(['H1', 'H2', 'H3']): _sh = np.nanmean(bk_test[j].values)*16/np.nanstd(bk_test[j].values) if _sh > 0: _mx = avg_drawdown(bk_test[_ny:, j].values) / (16 * np.std(bk_test[_ny:, j].values)) testh = conv_to_price(bk_test[j].values) else: _mx = avg_drawdown(-bk_test[_ny:, j].values) / (16 * np.std(bk_test[_ny:, j].values)) testh = conv_to_price(-bk_test[j].values) _sh = -_sh print([_sh, _mx]) plt.subplot(3, 1, jidx+1) plot_ts_new(_dt, testh[_ny:]) f2 = plt.figure(2) for jidx, j in enumerate(['F1', 'F2', 'F3']): _sh = np.nanmean(bk_test[j].values) * 16 / np.nanstd(bk_test[j].values) if _sh > 0: _mx = avg_drawdown(bk_test[_ny:, j].values) / (16 * np.std(bk_test[_ny:, j].values)) testh = conv_to_price(bk_test[j].values) else: _mx = avg_drawdown(-bk_test[_ny:, j].values) / (16 * np.std(bk_test[_ny:, j].values)) testh = conv_to_price(-bk_test[j].values) _sh = -_sh print([_sh, _mx]) plt.subplot(3, 1, jidx + 1) plot_ts_new(_dt, testh[_ny:]) _bk_test = bk_test[_ny:, bk_test.tick_cols()].values print(np.corrcoef(_bk_test.T)) # bk_test3 = bk_test.copy() f1.clear() f2.clear() plt.subplot(3, 1, 1) tmp = bk_test1[:, 'F1'].values plot_ts_new(_dt, conv_to_price(tmp)[_ny:]) print(np.nanmean(tmp)*16/np.nanstd(tmp)) plt.subplot(3, 1, 2) tmp = bk_test2[:, 'F1'].values plot_ts_new(_dt, conv_to_price(tmp)[_ny:]) print(np.nanmean(tmp)*16/np.nanstd(tmp)) plt.subplot(3, 1, 3) tmp = 0.5 * -bk_test3[:, 'F1'].values + 0.5 * bk_test2[:, 'F2'].values plot_ts_new(_dt, conv_to_price(tmp)[_ny:]) print(np.nanmean(tmp) * 16 / np.nanstd(tmp))
'D05S08_005_QRB', 'D01S08_038_QRG', 'D09S29_005_QRG', 'D05S23_349_LRB', 'D10S29_008_QRG', 'D10S14_032_QRG', 'D03S17_129_LRB', 'D02S17_005_QRB', 'D02S11_349_QRG', 'D10S14_012_QRG', 'D04S08_009_QRB', 'D05S14_236_QRG', 'D01S20_070_QRG', 'D10S29_005_QRG', 'D10S11_009_QRG', 'D07S08_005_QRG', 'D05S08_012_QRB', 'D08S29_086_QRG', 'D10S08_007_QRG', 'D09S14_129_QRB', 'D10S08_005_QRG', 'D01S08_008_QRB', 'D04S11_349_QRG', 'D02S14_156_LRB', 'D04S20_236_QRB', 'D01S11_429_QRG', 'D03S17_032_QRB', 'D07S29_005_QRG', 'D10S23_021_QRG', 'D08S20_005_QRG', 'D08S08_007_QRG', 'D09S17_129_QRG', 'D09S08_009_QRG', 'D01S08_017_QRG', 'D01S20_048_QRG', 'D01S08_027_QRG', 'D07S08_012_QRG', 'D06S11_038_QRG', 'D10S08_012_QRG', 'D03S11_349_QRB', 'D07S14_005_QRG', 'D03S20_349_LRB', 'D10S08_236_QRG', 'D02S08_009_QRG', 'D02S26_027_QRB', 'D06S29_005_QRG', 'D04S26_005_QRG', 'D08S17_021_QRG', 'D07S23_015_QRG', 'D09S29_129_QRG', 'D08S26_236_LRB', 'D10S11_005_QRG', 'D09S29_521_QRG'] for i in _eq_idx: # lag the volatility # i = _eq_idx[0] testv = filt.lag(mkt_retrieve(i, 'Stats', 'Volatility')['vol_gk240'].values) # lag the signal test2 = mkt_retrieve(i, 'MovReg', 'Signals')[['Date']+_sig_set_95_lvl] test2.set_columns(['Date']+[k + '_LVL' for k in _sig_set_95_lvl]) test2_ = mkt_retrieve(i, 'MovReg', 'Changes1')[['Date']+_sig_set_95_ch1] test2_.set_columns(['Date']+[k + '_CH1' for k in _sig_set_95_ch1]) test2 = DataFrame.merge(test2, test2_, on='Date') test2_ = mkt_retrieve(i, 'MovReg', 'Changes3')[['Date']+_sig_set_95_ch3] test2_.set_columns(['Date']+[k + '_CH3' for k in _sig_set_95_ch3]) test2 = DataFrame.merge(test2, test2_, on='Date') test2_ = mkt_retrieve(i, 'MovReg', 'Changes5')[['Date']+_sig_set_95_ch5] test2_.set_columns(['Date']+[k + '_CH5' for k in _sig_set_95_ch5])
def check_closest_volatility(): univ_ib_cl = cr_cret.retrieve(univ_ib_ext+'Close') univ_ib_rt1 = univ_ib_cl.copy() univ_ib_rt2 = univ_ib_cl.copy() univ_ib_rt3 = univ_ib_cl.copy() for i in univ_ib_cl.tick_cols(): # i = 'SP500' univ_ib_cl[i] = filt.ret(univ_ib_cl[i].values) univ_ib_rt1[i] = filt.ret(univ_ib_rt1[i].values, 30) univ_ib_rt1[i] = filt.lag(univ_ib_rt1[i].values) univ_ib_rt2[i] = filt.ret(univ_ib_rt2[i].values, 60) univ_ib_rt2[i] = filt.lag(univ_ib_rt2[i].values) univ_ib_rt3[i] = filt.ret(univ_ib_rt3[i].values, 120) univ_ib_rt3[i] = filt.lag(univ_ib_rt3[i].values) univ_ib_cl_data = univ_ib_cl[univ_ib_cl.tick_cols()].values.reshape(-1, ) univ_ib_rt1_data = univ_ib_rt1[univ_ib_rt1.tick_cols()].values.reshape(-1, ) univ_ib_rt2_data = univ_ib_rt2[univ_ib_rt2.tick_cols()].values.reshape(-1, ) univ_ib_rt3_data = univ_ib_rt3[univ_ib_rt3.tick_cols()].values.reshape(-1, ) univ_ib_cl_data = np.abs(univ_ib_cl_data) import warnings warnings.simplefilter('ignore', RuntimeWarning) univ_ib_rt1_data = np.sign(univ_ib_rt1_data) univ_ib_rt2_data = np.sign(univ_ib_rt2_data) univ_ib_rt3_data = np.sign(univ_ib_rt3_data) warnings.simplefilter('default', RuntimeWarning) # vol_names_ = [i for i in vol_names if 'reg' not in i] for j in vol_names: for k in range(_min_range, _max_range+_step_range, _step_range): if k < 100: univ_ib_vol = cr_vol_all.retrieve(univ_ib_ext + j + '_0' + str(k)) else: univ_ib_vol = cr_vol_all.retrieve(univ_ib_ext + j + '_' + str(k)) for i in univ_ib_vol.tick_cols(): univ_ib_vol[i] = filt.lag(univ_ib_vol[i].values) univ_ib_vol_data = univ_ib_vol[univ_ib_vol.tick_cols()].values.reshape(-1, ) univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn]*_norm_mult univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn] res0 = simple_quant_reg(univ_ib_cl_data_, univ_ib_vol_data_) # res0 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt1_data < 0) univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn]*_norm_mult univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn] res1 = simple_quant_reg(univ_ib_cl_data_, univ_ib_vol_data_) # res1 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt2_data < 0) univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn]*_norm_mult univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn] res2 = simple_quant_reg(univ_ib_cl_data_, univ_ib_vol_data_) # res2 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt3_data < 0) univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn]*_norm_mult univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn] # res3 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params res3 = simple_quant_reg(univ_ib_cl_data_, univ_ib_vol_data_) univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt1_data > 0) univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn] * _norm_mult univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn] # res4 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params res4 = simple_quant_reg(univ_ib_cl_data_, univ_ib_vol_data_) univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt2_data > 0) univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn] * _norm_mult univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn] # res5 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params res5 = simple_quant_reg(univ_ib_cl_data_, univ_ib_vol_data_) univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt3_data > 0) univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn] * _norm_mult univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn] # res6 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params res6 = simple_quant_reg(univ_ib_cl_data_, univ_ib_vol_data_) if k < 100: print(j + '_0' + str(k)+'\t', np_to_str(res0), '\t', np_to_str(res1), '\t', np_to_str(res2), '\t', np_to_str(res3), '\t', np_to_str(res4), '\t', np_to_str(res5), '\t', np_to_str(res6), '\t') else: print(j + '_' + str(k)+'\t', np_to_str(res0), '\t', np_to_str(res1), '\t', np_to_str(res2), '\t', np_to_str(res3), '\t', np_to_str(res4), '\t', np_to_str(res5), '\t', np_to_str(res6), '\t') print('\n')
def check_price_forecast(): univ_ib_dt = cr_cret.retrieve(univ_ib_eqidx_ext + 'ExchOpen')['Date'].values univ_ib_gd = cr_cret.retrieve(univ_ib_eqidx_ext + 'ExchOpen')['SP500'].values z = np.where(univ_ib_gd.astype('int') == 1)[0] univ_ib_cl = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close')['SP500'].values[z] univ_ib_vl = cr_vol_all_adj.retrieve(univ_ib_eqidx_ext + 'vol_pb_120')['SP500'].values[z] univ_ib_dt = univ_ib_dt[z] nlag = 2 n1 = filt.fst_nan(univ_ib_cl) n1 = np.maximum(n1, filt.fst_nan(univ_ib_vl)) univ_ib_sig = np.empty(0) univ_ib_sig_ = np.empty(0) for days in range(1, 11): for smth in range(5, 33): if days < 10: if smth < 10: tmp = cr_aret.retrieve(univ_ib_eqidx_ext + 'AdjSerD0' + str(days)+'S0' + str(smth))['SP500'].values[z] else: tmp = cr_aret.retrieve(univ_ib_eqidx_ext + 'AdjSerD0' + str(days)+'S' + str(smth))['SP500'].values[z] else: if smth < 10: tmp = cr_aret.retrieve(univ_ib_eqidx_ext + 'AdjSerD' + str(days)+'S0' + str(smth))['SP500'].values[z] else: tmp = cr_aret.retrieve(univ_ib_eqidx_ext + 'AdjSerD' + str(days)+'S' + str(smth))['SP500'].values[z] # get the change in the signal # tmp = np.sign(filt.chg(tmp)) # lag the data # tmp = filt.lag(tmp, nlag) if univ_ib_sig.shape[0] == 0: univ_ib_sig = tmp else: univ_ib_sig = np.vstack((univ_ib_sig, tmp)) n1 = np.maximum(n1, filt.fst_nan(tmp)) univ_ib_cl_ = np.sign(filt.chg(univ_ib_cl, nlag)) univ_ib_cl_ = univ_ib_cl_[n1:] univ_ib_vl_ = univ_ib_vl[n1:] univ_ib_sig = univ_ib_sig[:, n1:] names = [] for days in range(1, 11): for smth in range(5, 33): if (days < 10) and (smth < 10): names.append('0'+str(days)+'_0'+str(smth)) elif (days < 10) and (smth >= 10): names.append('0'+str(days)+'_'+str(smth)) elif (days >= 10) and (smth < 10): names.append(str(days)+'_0'+str(smth)) else: names.append(str(days)+'_'+str(smth)) for i in range(0, len(names)): print(names[i], ':', smart_kendall(univ_ib_cl_, univ_ib_sig[i, :])) plot_ts_new(univ_ib_dt, univ_ib_cl) plot_ts_new(univ_ib_dt, univ_ib_sig[167, :]) t1 = filt.ret(filt.lag(univ_ib_sig[167, :])) t1_ = filt.ret(univ_ib_sig[167, :]) t2 = filt.ret(univ_ib_cl) t3 = t2*np.sign(t1) t1_, t2, t3 = reduce_nonnan(t1_, t2, t3) ct1_ = np.cumprod(1+t1_)*100 ct2 = np.cumprod(1+t2)*100 ct3 = np.cumprod(1-t3)*100 f = pyl.figure(1) f.clear() pyl.subplot(3, 1, 1) pyl.semilogy(ct2[-5000:]) pyl.subplot(3, 1, 2) pyl.semilogy(ct1_[-5000:]) pyl.subplot(3, 1, 3) pyl.semilogy(ct3[-5000:])
def get_smart_measures(): univ_ib_data = cr_cret.retrieve(univ_ib_eqidx_ext + 'GBM') univ_ib_cl = univ_ib_data['Close'].values univ_ib_vl = univ_ib_data['Volatility'].values del univ_ib_data['Close'] del univ_ib_data['Volatility'] new_col_names = list(univ_ib_data.columns) univ_ib_data = univ_ib_data.values num_col = len(new_col_names) # change data to appropriate lags for i in range(0, num_col): univ_ib_data[:, i] = filt.lag(univ_ib_data[:, i], 2) univ_ib_vl = filt.lag(univ_ib_vl, 2) univ_ib_cl1 = univ_ib_cl.copy() univ_ib_cl2 = (univ_ib_cl+filt.lag(univ_ib_cl))/2 # divide by the volatility univ_ib_cl1 = univ_ib_cl1/univ_ib_vl univ_ib_cl2 = univ_ib_cl2/univ_ib_vl for i in range(0, num_col): univ_ib_data[:, i] = univ_ib_data[:, i]/univ_ib_vl # standardize them for i in range(0, num_col): univ_ib_data[:, i] = univ_ib_data[:, i]/med_abs_dev(univ_ib_data[5220:, i]) univ_ib_cl1 = univ_ib_cl1/med_abs_dev(univ_ib_cl1[5220:]) univ_ib_cl2 = univ_ib_cl2/med_abs_dev(univ_ib_cl2[5220:]) # get the correlations with univ_ib_cl univ_ib_correl1 = np.zeros(num_col) univ_ib_correl2 = np.zeros(num_col) for i in range(0, num_col): univ_ib_correl1[i] = smart_kendall(univ_ib_data[5220:, i], univ_ib_cl1[5220:]) univ_ib_correl2[i] = smart_kendall(univ_ib_data[5220:, i], univ_ib_cl2[5220:]) univ_ib_correl1a = np.array([(j if 'M_' not in new_col_names[i] else np.nan) for i, j in enumerate(univ_ib_correl1)]) univ_ib_correl2a = np.array([(j if 'M_' not in new_col_names[i] else np.nan) for i, j in enumerate(univ_ib_correl2)]) univ_ib_correl1b = np.array([(j if (('M_' not in new_col_names[i]) and ('W_' not in new_col_names[i])) else np.nan) for i, j in enumerate(univ_ib_correl1)]) univ_ib_correl2b = np.array([(j if (('M_' not in new_col_names[i]) and ('W_' not in new_col_names[i])) else np.nan) for i, j in enumerate(univ_ib_correl2)]) for k in range(0, 3): if k == 0: univ_ib_correl1_ = univ_ib_correl1 univ_ib_correl2_ = univ_ib_correl2 elif k == 1: univ_ib_correl1_ = univ_ib_correl1a univ_ib_correl2_ = univ_ib_correl2a else: univ_ib_correl1_ = univ_ib_correl1b univ_ib_correl2_ = univ_ib_correl2b _correl = univ_ib_correl1_ + univ_ib_correl2_ _mom_sig = [] _mom_sig_idx = [] _mom_val = [] for i in range(0, 20): if i == 0: _idx = argmax_fixed(_correl) _mom_sig.append(new_col_names[_idx]) _mom_sig_idx.append(_idx) _mom_val.append(_correl[_idx]) else: # calculate correlation with the already selected series _scorrel = np.ones(7200, dtype=bool) for j1 in _mom_sig_idx: _scorrel[j1] = False for j1 in range(0, 7200): if j1 not in _mom_sig_idx: for j2 in _mom_sig_idx: if _scorrel[j1] and (smart_kendall(univ_ib_data[5220:, j1], univ_ib_data[5220:, j2]) > 0.6): _scorrel[j1] = False # add constraint to handle the case where _scorrel is True, but the correlation with _scorrel_idx = np.where(_scorrel)[0] if _scorrel_idx.shape[0] > 0: _correl_red = _correl[_scorrel_idx] _idx = _scorrel_idx[argmax_fixed(_correl_red)] if _correl[_idx] > 0.015: _mom_sig.append(new_col_names[_idx]) _mom_sig_idx.append(_idx) _mom_val.append(_correl[_idx]) else: break else: break print(i) print(_mom_sig) print(_mom_val) _rev_sig = [] _rev_sig_idx = [] _rev_val = [] for i in range(0, 40): # i = 0 if i == 0: _idx = argmin_fixed(_correl) _rev_sig.append(new_col_names[_idx]) _rev_sig_idx.append(_idx) _rev_val.append(_correl[_idx]) else: # calculate correlation with the already selected series _scorrel = np.ones(7200, dtype=bool) for j1 in _rev_sig_idx: _scorrel[j1] = False for j1 in range(0, 7200): if j1 not in _rev_sig_idx: for j2 in _rev_sig_idx: if _scorrel[j1] and (smart_kendall(univ_ib_data[5220:, j1], univ_ib_data[5220:, j2]) > 0.6): _scorrel[j1] = False # add constraint to handle the case where _scorrel is True, but the correlation with _scorrel_idx = np.where(_scorrel)[0] if _scorrel_idx.shape[0] > 0: _correl_red = _correl[_scorrel_idx] _idx = _scorrel_idx[argmin_fixed(_correl_red)] if _correl[_idx] < -0.015: _rev_sig.append(new_col_names[_idx]) _rev_sig_idx.append(_idx) _rev_val.append(_correl[_idx]) else: break else: break print(i) print(_rev_sig) print(_rev_val)