def gradient_boosting_result(_ticker='SP500'): univ_ib_gd = cr_cret.retrieve(univ_ib_eqidx_ext + 'ExchOpen')[_ticker].values z = list(np.where(univ_ib_gd.astype('int') == 1)[0]) _prc_names = ['CLS', 'A2D', 'A3D', 'A4D', 'A2W', 'A3W', 'A4W', 'A2M', 'A3M', 'A4M'] _trn_names = ['LVL', 'CH0', 'CH1', 'CH2'] _fil_names = ['LRB', 'QRB', 'QRG'] _hoz_names = [] for j in range(5, 305, 5): if j < 10: _hoz_names.append('00' + str(j)) elif j < 100: _hoz_names.append('0' + str(j)) else: _hoz_names.append(str(j)) # get volatility forecast univ_ib_vl = cr_vol_all_adj.retrieve(univ_ib_eqidx_ext + 'vol_pb_120')[_ticker].values[z] # get return univ_ib_cl = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close')[_ticker].values[z] univ_ib_cl = filt.ret(univ_ib_cl) univ_ib_sig_all = None for k in _trn_names: for i in _prc_names: for j in _fil_names: for mi, m in enumerate(_hoz_names): fn = univ_ib_eqidx_ext+i+'_'+j+'_'+k+'_'+m univ_ib_sig = cr_sig_mr_sg.retrieve(fn)[_ticker].values[z] if univ_ib_sig_all is None: univ_ib_sig_all = DataFrame({i+'_'+j+'_'+k+'_'+m: univ_ib_sig}) else: univ_ib_sig_all.col_bind(DataFrame({i+'_'+j+'_'+k+'_'+m: univ_ib_sig})) # just for storage - incase something happens # univ_ib_gb = DataFrame({'Close': univ_ib_cl, 'Volatility': univ_ib_vl}) # univ_ib_gb.col_bind(univ_ib_sig_all) # cr_cret.store(univ_ib_eqidx_ext + 'GBM', univ_ib_gb) reg_lookback = [120, 240, 360, 480] new_col_names = list(univ_ib_sig_all.columns) for i in new_col_names: # i = new_col_names[0] test_ = np.empty(0) for j in reg_lookback: # j = reg_lookback[0] uic = univ_ib_cl uiv = univ_ib_vl uis = filt.lag(univ_ib_sig_all[i].values, 1) uic, uis, uiv = reduce_nonnan(uic, uis, uiv) b1 = qreg.roll_e_ladreg_1d(uic, uis, j) b2 = qreg.roll_e_ladreg_1d(uic/uiv, uis/uiv, j) resid1 = uic - filt.lag(b1)*uis resid2 = uic - filt.lag(b2)*uis resid1, resid2 = reduce_nonnan(resid1, resid2) test_ = np.hstack((test_, np.array([np.median(abs(resid1)), np.median(abs(resid2))]))) print(i+' : '+np_to_str(test_))
def response_curve(x1, _ticker='SP500', f=None, md=True, sigd=False): univ_ib_gd = cr_cret.retrieve(univ_ib_eqidx_ext + 'ExchOpen')[_ticker].values univ_ib_vl = cr_vol_all_adj.retrieve(univ_ib_eqidx_ext + 'vol_pb_120')[_ticker].values univ_ib_s1 = cr_sig_mr_sg.retrieve(univ_ib_eqidx_ext + x1)[_ticker].values univ_ib_cl = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close')[_ticker].values z = list(np.where(univ_ib_gd.astype('int') == 1)[0]) univ_ib_s1 = univ_ib_s1[z] univ_ib_cl = univ_ib_cl[z] univ_ib_vl = univ_ib_vl[z] univ_ib_cl = filt.ret(univ_ib_cl)/filt.lag(univ_ib_vl, 1) univ_ib_s1 = filt.lag(univ_ib_s1, 1)/filt.lag(univ_ib_vl, 1) univ_ib_cl, univ_ib_s1 = reduce_nonnan(univ_ib_cl, univ_ib_s1) _bins = 20 _range = np.maximum(np.percentile(univ_ib_s1, 99), -np.percentile(univ_ib_s1, 1)) _delta = _range/_bins if f is not None: pyl.figure(f) else: pyl.figure(1) for i in range(0, 16): if i == 0: uis1 = univ_ib_s1 uic1 = univ_ib_cl else: uis1 = filt.lag(univ_ib_s1, i) uic1 = filt.sma(univ_ib_cl, i+1) uis1, uic1 = reduce_nonnan(uis1, uic1) uis1_b = np.linspace(-_range, _range, num=_bins+1) uic1_b = np.zeros(_bins+1)*np.nan for j in range(0, _bins+1): # j = 1 if j==0: tmp__ = np.where(uis1 <= uis1_b[j]+_delta)[0] elif j == _bins+1: tmp__ = np.where(uis1 > uis1_b[j]-_delta)[0] else: tmp__ = np.where((uis1 <= uis1_b[j]+_delta) & (uis1 > uis1_b[j]-_delta))[0] if tmp__.shape[0] > 0: if md: if not sigd: uic1_b[j] = np.nanmedian(uic1[tmp__]) #/np.nanstd(uic1[tmp__]) else: uic1_b[j] = np.nanmedian(uic1[tmp__])/np.nanstd(uic1[tmp__]) else: if not sigd: uic1_b[j] = np.nanmean(uic1[tmp__]) #/np.nanstd(uic1[tmp__]) else: uic1_b[j] = np.nanmean(uic1[tmp__])/np.nanstd(uic1[tmp__]) pyl.subplot(4, 4, i+1) pyl.plot(uis1_b, uic1_b)
def autocorr(x, m=1): x1 = filt.lag(x, m) x2 = np.copy(x) x1, x2 = reduce_nonnan(x1, x2) x1m = (x1 - np.mean(x1))/np.std(x1) x2m = (x2 - np.mean(x2))/np.std(x2) return np.mean(x1m*x2m)
def return_correl(x1, x2, _ticker='SP500'): univ_ib_gd = cr_cret.retrieve(univ_ib_eqidx_ext + 'ExchOpen')[_ticker].values univ_ib_s1 = cr_sig_mr_sg.retrieve(univ_ib_eqidx_ext + x1)[_ticker].values univ_ib_s2 = cr_sig_mr_sg.retrieve(univ_ib_eqidx_ext + x2)[_ticker].values z = list(np.where(univ_ib_gd.astype('int') == 1)[0]) univ_ib_s1 = univ_ib_s1[z] univ_ib_s2 = univ_ib_s2[z] univ_ib_s1, univ_ib_s2 = reduce_nonnan(univ_ib_s1, univ_ib_s2) return spearmanr(univ_ib_s1, univ_ib_s2).correlation
def get_best_fit(k): # for k in range(5, 16): good1k = (k - 1) / (k + 1) good1k_ = k / (k + 2) print('Looking for EMA %s equivalent with autocorr = %s' % (str(k), str(good1k))) # metric = [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan] good2b = np.nan for n1 in range(10, 40, 5): # range(20, 160, 20): # n1 = 160 # print('Processing n1 = %s' % str(n1)) for n2 in range(5, 50, 5): # n2 = 100 for f1 in range(10, 60, 5): #range(10, 300, 10): # range(2, 100, 2): # f1 = 200 for f2 in range(6, 48, 2): # range(2, 36, 2): # f2 = 28 for lthresh in range(0, 1, 1): # (0, 12, 1) # lthresh = 1 for oshpr in range(7, 30, 1): # oshpr = 28 _loss = n1 + n2 + f1 + f2 + oshpr if _loss <= 250: # 150 if np.isnan(good2b) | (~np.isnan(good2b) & (good2b > 0.0001)): # print(n1, n2, f1, f2, lthresh, oshpr) test1p = filt.smth_price(test3p_, test2p_, n1, n2, f1, f2, lthresh, oshpr) test1p, test2p, test3p = reduce_nonnan(test1p, test2p_, test3p_) # test1r = good1 = autocorr(filt.ret(test1p)[-7000:]) if (good1 >= good1k) & (good1 < good1k_): # get the overshoot measure xoc_mx = test1p-np.maximum(test2p, test3p) xoc_mn = test1p-np.minimum(test2p, test3p) good2_ = np.abs(200*((xoc_mx>0)*xoc_mx+(xoc_mn<0)*xoc_mn)/(test2p+test3p)) good2 = np.mean(good2_[-7000:])*2/(k+1)+np.median(good2_[-7000:])*(k-1)/(k+1) good3 = good2 good2 = good2*np.sqrt(_loss) if np.isnan(good2b): metric = [k, n1, n2, f1, f2, lthresh, oshpr, good1, good3, _loss, good2] good2b = good2 print(metric) elif good2 < good2b: metric = [k, n1, n2, f1, f2, lthresh, oshpr, good1, good3, _loss, good2] good2b = good2 print(metric) return None
def spline_curve(x1, _ticker='SP500', f=None, x1_=None, x2_=None, scat=True): univ_ib_gd = cr_cret.retrieve(univ_ib_eqidx_ext + 'ExchOpen')[_ticker].values univ_ib_vl = cr_vol_all_adj.retrieve(univ_ib_eqidx_ext + 'vol_pb_120')[_ticker].values univ_ib_s1 = cr_sig_mr_sg.retrieve(univ_ib_eqidx_ext + x1)[_ticker].values univ_ib_cl = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close')[_ticker].values z = list(np.where(univ_ib_gd.astype('int') == 1)[0]) univ_ib_s1 = univ_ib_s1[z] univ_ib_cl = univ_ib_cl[z] univ_ib_vl = univ_ib_vl[z] univ_ib_cl = filt.ret(univ_ib_cl) / filt.lag(univ_ib_vl, 1) univ_ib_s1 = filt.lag(univ_ib_s1, 1) / filt.lag(univ_ib_vl, 1) univ_ib_cl, univ_ib_s1 = reduce_nonnan(univ_ib_cl, univ_ib_s1) print(spearmanr(univ_ib_cl, univ_ib_s1).correlation) test_weight = np.ones(univ_ib_cl.shape[0]) / univ_ib_cl.shape[0] if x1_ is None: x1 = np.percentile(univ_ib_s1, 25) else: x1 = x1_ if x2_ is None: x2 = np.percentile(univ_ib_s1, 75) else: x2 = x2_ testa = rpf2.cubic_fit_linreg(univ_ib_cl, univ_ib_s1, x1, x2, test_weight) x_ = np.linspace(-1, 1, num=101) b0 = testa[0] b1 = testa[1] b2 = testa[2] b3 = testa[3] a0 = testa[4] c0 = testa[5] y_ = (b0+a0*(x_<x1)+c0*(x_>x2))+(b1-3*(a0/x_)*(x_<x1)-3*(c0/x_)*(x_>x2))*x_+\ (b2+3*(a0/(x_**2))*(x_<x1)+3*(c0/(x_**2))*(x_>x2))*(x_**2)+\ (b3-(a0/(x_**3))*(x_<x1)-(c0/(x_**3))*(x_>x2))*(x_**3) y_[np.where(np.isnan(y_))[0]] = b0 z_ = np.zeros(len(y_)) pyl.plot(x_, y_) pyl.plot(x_, z_) if scat: pyl.scatter(univ_ib_s1, univ_ib_cl, c='c') return None
univ_ib_vol5[ticker] = filt.lag(univ_ib_vol5[ticker].values) univ_ib_vol6[ticker] = filt.lag(univ_ib_vol6[ticker].values) univ_ib_vol7[ticker] = filt.lag(univ_ib_vol7[ticker].values) univ_ib_dt_ = univ_ib_cl['Date'].values univ_ib_cl_ = univ_ib_cl[ticker].values univ_ib_vol0_ = univ_ib_vol0[ticker].values univ_ib_vol1_ = univ_ib_vol1[ticker].values univ_ib_vol2_ = univ_ib_vol2[ticker].values univ_ib_vol3_ = univ_ib_vol3[ticker].values univ_ib_vol4_ = univ_ib_vol4[ticker].values univ_ib_vol5_ = univ_ib_vol5[ticker].values univ_ib_vol6_ = univ_ib_vol6[ticker].values univ_ib_vol7_ = univ_ib_vol7[ticker].values univ_ib_cl_, univ_ib_vol0_, univ_ib_vol1_, univ_ib_vol2_, univ_ib_vol3_, univ_ib_vol4_, univ_ib_vol5_, univ_ib_vol6_, univ_ib_vol7_ = reduce_nonnan( univ_ib_cl_, univ_ib_vol0_, univ_ib_vol1_, univ_ib_vol2_, univ_ib_vol3_, univ_ib_vol4_, univ_ib_vol5_, univ_ib_vol6_, univ_ib_vol7_) # univ_ib_cl_, univ_ib_vol3_ = reduce_nonnan(univ_ib_cl_, univ_ib_vol3_) # univ_ib_cl_, univ_ib_vol3_, univ_ib_dt_ = reduce_nonnan(univ_ib_cl_, univ_ib_vol3_, univ_ib_dt_) # univ_ib_cl_, univ_ib_vol0_, univ_ib_vol3_, univ_ib_dt_ = reduce_nonnan(univ_ib_cl_, univ_ib_vol0_, univ_ib_vol3_, univ_ib_dt_) univ_ib_vol_ = [univ_ib_vol0_, univ_ib_vol1_, univ_ib_vol2_, univ_ib_vol3_, univ_ib_vol4_, univ_ib_vol5_, univ_ib_vol6_, univ_ib_vol7_] # univ_ib_vol_ = [univ_ib_vol3_] # univ_ib_vol_ = [univ_ib_vol0_, univ_ib_vol3_] for reg_lookback in [120]: # [120, 240, 360] # reg_lookback = 120 # print('reg_lookback is %s\n' % str(reg_lookback)) # f1, f2, f3, f4 = [], [], [], [] for i in range(0, len(univ_ib_vol_)): # i = 1
def autocorr(x, m=1): x1 = filt.lag(x, m) x2 = np.copy(x) x1, x2 = reduce_nonnan(x1, x2) return smart_kendall(x1, x2)
# n2 = 100 for f1 in range(10, 300, 10): #range(10, 300, 10): # range(2, 100, 2): # f1 = 200 for f2 in range(6, 36, 2): # range(2, 36, 2): # f2 = 28 for lthresh in range(0, 2, 1): #[1]: # lthresh = 1 for oshpr in range(13, 16, 3): # oshpr = 28 if (np.isnan(good2b) | (~np.isnan(good2b) & (good2b > 0.0001))) & (n1+n2+f1+f2 < 180): # print(n1, n2, f1, f2, lthresh, oshpr) test1p = spec_smth(univ_ib_op[_ticker].values, univ_ib_cl[_ticker].values, n1, n2, f1, f2, lthresh, oshpr) test2p = univ_ib_cl[_ticker].values test3p = univ_ib_op[_ticker].values testkp = np.copy(testk) test1p, test2p, test3p, testkp = reduce_nonnan(test1p, test2p, test3p, testkp) test1r = filt.ret(test1p) testkr = filt.ret(testkp) test1r, testkr = reduce_nonnan(test1r, testkr) # good1 = autocorr(test1r)-autocorr(test2r) good1 = autocorr(test1r[-7000:]) good1k = autocorr(testkr[-7000:]) # get the overshoot measure xoc_mx = test1p-np.maximum(test2p, test3p) xoc_mn = test1p-np.minimum(test2p, test3p) good2_ = np.abs(200*((xoc_mx>0)*xoc_mx+(xoc_mn<0)*xoc_mn)/(test2p+test3p)) # good2 = np.mean(good2_[-7000:]) good2 = np.median(good2_[-7000:])
_hypsecant_lrb = 0 _laplace_lrb = 0 _cauchy_lrb = 0 _logistic_lrb = 0 _hypsecant_qrb = 0 _laplace_qrb = 0 _cauchy_qrb = 0 _logistic_qrb = 0 _hypsecant_qrg = 0 _laplace_qrg = 0 _cauchy_qrg = 0 _logistic_qrg = 0 for i in tick_cols: print("Processing %s " % i) tmp_ = best_fit_distribution(reduce_nonnan(filt.chg(tmp[i].values, 3))[0]) if "_LRB" in i: if tmp_[0] == "hypsecant": _hypsecant_lrb += 1 if tmp_[0] == "laplace": _laplace_lrb += 1 if tmp_[0] == "cauchy": _cauchy_lrb += 1 if tmp_[0] == "logistic": _logistic_lrb += 1 print("LRB", _hypsecant_lrb, _laplace_lrb, _cauchy_lrb, _logistic_lrb) if "_QRB" in i: if tmp_[0] == "hypsecant": _hypsecant_qrb += 1 if tmp_[0] == "laplace": _laplace_qrb += 1