def return_stats_for_various_vol():
    univ_ib_cl = cr_cret.retrieve(univ_ib_ext + "Close")
    univ_ib_rt1 = univ_ib_cl.copy()
    univ_ib_rt2 = univ_ib_cl.copy()
    univ_ib_rt3 = univ_ib_cl.copy()
    for i in univ_ib_cl.tick_cols():
        univ_ib_cl[i] = filt.ret(univ_ib_cl[i].values)
    for i in univ_ib_rt1.tick_cols():
        univ_ib_rt1[i] = filt.ret(univ_ib_rt1[i].values, 27)
        univ_ib_rt1[i] = filt.lag(univ_ib_rt1[i].values)
    for i in univ_ib_rt2.tick_cols():
        univ_ib_rt2[i] = filt.ret(univ_ib_rt2[i].values, 80)
        univ_ib_rt2[i] = filt.lag(univ_ib_rt2[i].values)
    for i in univ_ib_rt3.tick_cols():
        univ_ib_rt3[i] = filt.ret(univ_ib_rt3[i].values, 240)
        univ_ib_rt3[i] = filt.lag(univ_ib_rt3[i].values)
    univ_ib_cl_data = univ_ib_cl[univ_ib_cl.tick_cols()].values.reshape(-1)
    univ_ib_rt1_data = univ_ib_rt1[univ_ib_rt1.tick_cols()].values.reshape(-1)
    univ_ib_rt2_data = univ_ib_rt2[univ_ib_rt2.tick_cols()].values.reshape(-1)
    univ_ib_rt3_data = univ_ib_rt3[univ_ib_rt3.tick_cols()].values.reshape(-1)
    univ_ib_cl_data = np.abs(univ_ib_cl_data)
    import warnings

    warnings.simplefilter("ignore", RuntimeWarning)
    univ_ib_rt1_data = np.sign(univ_ib_rt1_data)
    univ_ib_rt2_data = np.sign(univ_ib_rt2_data)
    univ_ib_rt3_data = np.sign(univ_ib_rt3_data)
    warnings.simplefilter("default", RuntimeWarning)

    for j in vol_names:
        for k in range(30, 330, 30):
            if k < 100:
                univ_ib_vol = cr_vol_all.retrieve(univ_ib_ext + j + "_0" + str(k) + "D")
                univ_ib_vol = cr_vol_all.retrieve(univ_ib_ext + j + "_" + str(k) + "D")
            for i in univ_ib_vol.tick_cols():
                univ_ib_vol[i] = filt.lag(univ_ib_vol[i].values)
            univ_ib_vol_data = univ_ib_vol[univ_ib_vol.tick_cols()].values.reshape(-1)

            # univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data)
            univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt1_data < 0)

            univ_ib_vol_data = univ_ib_vol_data[univ_ib_nn]
            univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn]
            rho = ss.spearmanr(univ_ib_vol_data, univ_ib_cl_data_).correlation
            if k < 100:
                print(j + "_0" + str(k) + "D:\t", rho)
                print(j + "_" + str(k) + "D:\t", rho)
def gradient_boosting_result(_ticker='SP500'):
    univ_ib_gd = cr_cret.retrieve(univ_ib_eqidx_ext + 'ExchOpen')[_ticker].values
    z = list(np.where(univ_ib_gd.astype('int') == 1)[0])

    _prc_names = ['CLS', 'A2D', 'A3D', 'A4D', 'A2W', 'A3W', 'A4W', 'A2M', 'A3M', 'A4M']
    _trn_names = ['LVL', 'CH0', 'CH1', 'CH2']
    _fil_names = ['LRB', 'QRB', 'QRG']
    _hoz_names = []
    for j in range(5, 305, 5):
        if j < 10:
            _hoz_names.append('00' + str(j))
        elif j < 100:
            _hoz_names.append('0' + str(j))

    # get volatility forecast
    univ_ib_vl = cr_vol_all_adj.retrieve(univ_ib_eqidx_ext + 'vol_pb_120')[_ticker].values[z]

    # get return
    univ_ib_cl = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close')[_ticker].values[z]
    univ_ib_cl = filt.ret(univ_ib_cl)

    univ_ib_sig_all = None
    for k in _trn_names:
        for i in _prc_names:
            for j in _fil_names:
                for mi, m in enumerate(_hoz_names):
                    fn = univ_ib_eqidx_ext+i+'_'+j+'_'+k+'_'+m
                    univ_ib_sig = cr_sig_mr_sg.retrieve(fn)[_ticker].values[z]
                    if univ_ib_sig_all is None:
                        univ_ib_sig_all = DataFrame({i+'_'+j+'_'+k+'_'+m: univ_ib_sig})
                        univ_ib_sig_all.col_bind(DataFrame({i+'_'+j+'_'+k+'_'+m: univ_ib_sig}))

    # just for storage - incase something happens
    # univ_ib_gb = DataFrame({'Close': univ_ib_cl, 'Volatility': univ_ib_vl})
    # univ_ib_gb.col_bind(univ_ib_sig_all)
    # cr_cret.store(univ_ib_eqidx_ext + 'GBM', univ_ib_gb)

    reg_lookback = [120, 240, 360, 480]
    new_col_names = list(univ_ib_sig_all.columns)

    for i in new_col_names:
        # i = new_col_names[0]
        test_ = np.empty(0)
        for j in reg_lookback:
            # j = reg_lookback[0]
            uic = univ_ib_cl
            uiv = univ_ib_vl
            uis = filt.lag(univ_ib_sig_all[i].values, 1)
            uic, uis, uiv = reduce_nonnan(uic, uis, uiv)
            b1 = qreg.roll_e_ladreg_1d(uic, uis, j)
            b2 = qreg.roll_e_ladreg_1d(uic/uiv, uis/uiv, j)

            resid1 = uic - filt.lag(b1)*uis
            resid2 = uic - filt.lag(b2)*uis
            resid1, resid2 = reduce_nonnan(resid1, resid2)
            test_ = np.hstack((test_, np.array([np.median(abs(resid1)), np.median(abs(resid2))])))
        print(i+' : '+np_to_str(test_))
def response_curve(x1, _ticker='SP500', f=None, md=True, sigd=False):
    univ_ib_gd = cr_cret.retrieve(univ_ib_eqidx_ext + 'ExchOpen')[_ticker].values
    univ_ib_vl = cr_vol_all_adj.retrieve(univ_ib_eqidx_ext + 'vol_pb_120')[_ticker].values
    univ_ib_s1 = cr_sig_mr_sg.retrieve(univ_ib_eqidx_ext + x1)[_ticker].values
    univ_ib_cl = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close')[_ticker].values
    z = list(np.where(univ_ib_gd.astype('int') == 1)[0])
    univ_ib_s1 = univ_ib_s1[z]
    univ_ib_cl = univ_ib_cl[z]
    univ_ib_vl = univ_ib_vl[z]

    univ_ib_cl = filt.ret(univ_ib_cl)/filt.lag(univ_ib_vl, 1)
    univ_ib_s1 = filt.lag(univ_ib_s1, 1)/filt.lag(univ_ib_vl, 1)

    univ_ib_cl, univ_ib_s1 = reduce_nonnan(univ_ib_cl, univ_ib_s1)

    _bins = 20
    _range = np.maximum(np.percentile(univ_ib_s1, 99), -np.percentile(univ_ib_s1, 1))
    _delta = _range/_bins
    if f is not None:
    for i in range(0, 16):
        if i == 0:
            uis1 = univ_ib_s1
            uic1 = univ_ib_cl
            uis1 = filt.lag(univ_ib_s1, i)
            uic1 = filt.sma(univ_ib_cl, i+1)
        uis1, uic1 = reduce_nonnan(uis1, uic1)
        uis1_b = np.linspace(-_range, _range, num=_bins+1)
        uic1_b = np.zeros(_bins+1)*np.nan
        for j in range(0, _bins+1):
            # j = 1
            if j==0:
                tmp__ = np.where(uis1 <= uis1_b[j]+_delta)[0]
            elif j == _bins+1:
                tmp__ = np.where(uis1 > uis1_b[j]-_delta)[0]
                tmp__ = np.where((uis1 <= uis1_b[j]+_delta) & (uis1 > uis1_b[j]-_delta))[0]
            if tmp__.shape[0] > 0:
                if md:
                    if not sigd:
                        uic1_b[j] = np.nanmedian(uic1[tmp__]) #/np.nanstd(uic1[tmp__])
                        uic1_b[j] = np.nanmedian(uic1[tmp__])/np.nanstd(uic1[tmp__])
                    if not sigd:
                        uic1_b[j] = np.nanmean(uic1[tmp__]) #/np.nanstd(uic1[tmp__])
                        uic1_b[j] = np.nanmean(uic1[tmp__])/np.nanstd(uic1[tmp__])
        pyl.subplot(4, 4, i+1)
        pyl.plot(uis1_b, uic1_b)
def get_best_fit(k):
    # for k in range(5, 16):
    good1k = (k - 1) / (k + 1)
    good1k_ = k / (k + 2)
    print('Looking for EMA %s equivalent with autocorr = %s' % (str(k), str(good1k)))

    # metric = [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]
    good2b = np.nan
    for n1 in range(10, 40, 5): # range(20, 160, 20):
        # n1 = 160
        # print('Processing n1 = %s' % str(n1))
        for n2 in range(5, 50, 5):
            # n2 = 100
            for f1 in range(10, 60, 5): #range(10, 300, 10): # range(2, 100, 2):
                # f1 = 200
                for f2 in range(6, 48, 2): # range(2, 36, 2):
                    # f2 = 28
                    for lthresh in range(0, 1, 1): # (0, 12, 1)
                        # lthresh = 1
                        for oshpr in range(7, 30, 1):
                            # oshpr = 28
                            _loss = n1 + n2 + f1 + f2 + oshpr
                            if _loss <= 250: # 150
                                if np.isnan(good2b) | (~np.isnan(good2b) & (good2b > 0.0001)):
                                    # print(n1, n2, f1, f2, lthresh, oshpr)
                                    test1p = filt.smth_price(test3p_, test2p_, n1, n2, f1, f2, lthresh, oshpr)
                                    test1p, test2p, test3p = reduce_nonnan(test1p, test2p_, test3p_)

                                    # test1r =
                                    good1 = autocorr(filt.ret(test1p)[-7000:])

                                    if (good1 >= good1k) & (good1 < good1k_):

                                        # get the overshoot measure
                                        xoc_mx = test1p-np.maximum(test2p, test3p)
                                        xoc_mn = test1p-np.minimum(test2p, test3p)
                                        good2_ = np.abs(200*((xoc_mx>0)*xoc_mx+(xoc_mn<0)*xoc_mn)/(test2p+test3p))
                                        good2 = np.mean(good2_[-7000:])*2/(k+1)+np.median(good2_[-7000:])*(k-1)/(k+1)

                                        good3 = good2
                                        good2 = good2*np.sqrt(_loss)

                                        if np.isnan(good2b):
                                            metric = [k, n1, n2, f1, f2, lthresh, oshpr, good1, good3, _loss, good2]
                                            good2b = good2
                                        elif good2 < good2b:
                                            metric = [k, n1, n2, f1, f2, lthresh, oshpr, good1, good3, _loss, good2]
                                            good2b = good2
    return None
def pred_stats_for_various_vol():
    univ_ib_cl = cr_cret.retrieve(univ_ib_ext + "Close")
    univ_ib_rt = univ_ib_cl.copy()
    for i in univ_ib_cl.tick_cols():
        univ_ib_cl[i] = filt.ret(univ_ib_cl[i].values)
    for i in univ_ib_rt.tick_cols():
        univ_ib_rt[i] = filt.ret(univ_ib_rt[i].values, 30)
        univ_ib_rt[i] = filt.lag(univ_ib_rt[i].values)
    univ_ib_cl_data = univ_ib_cl[univ_ib_cl.tick_cols()].values.reshape(-1)
    univ_ib_rt_data = univ_ib_rt[univ_ib_rt.tick_cols()].values.reshape(-1)
    # univ_ib_cl_data = np.abs(univ_ib_cl_data)
    import warnings

    warnings.simplefilter("ignore", RuntimeWarning)
    univ_ib_rt_data = np.sign(univ_ib_rt_data)
    warnings.simplefilter("default", RuntimeWarning)

    for j in vol_names_sm:
        for k in range(30, 330, 30):  # changed here
            if k < 100:
                univ_ib_vol = cr_vol_all.retrieve(univ_ib_ext + j + "_0" + str(k) + "D")
                univ_ib_vol = cr_vol_all.retrieve(univ_ib_ext + j + "_" + str(k) + "D")
            for i in univ_ib_vol.tick_cols():
                univ_ib_vol[i] = filt.chg(filt.lag(1 / univ_ib_vol[i].values))
            univ_ib_vol_data = univ_ib_vol[univ_ib_vol.tick_cols()].values.reshape(-1)

            # univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data)
            univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data)  # & (univ_ib_rt_data < 0)

            univ_ib_vol_data = univ_ib_vol_data[univ_ib_nn]
            univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn]
            rho = ss.spearmanr(univ_ib_vol_data, univ_ib_cl_data_).correlation
            if k < 100:
                print(j + "_0" + str(k) + "D:\t", rho)
                print(j + "_" + str(k) + "D:\t", rho)
def check_pos_vs_neg_carry():
    univ_ib_cl = cr_cret.retrieve(univ_ib_ext + "Close")[["Date", "SP500"]]
    univ_ib_vol1 = cr_vol_all.retrieve(univ_ib_ext + "vol_rs_zlf_zdt_300D")[["Date", "SP500"]]
    univ_ib_vol2 = cr_vol_all.retrieve(univ_ib_ext + "vol_cc_zlt_zdt_300D")[["Date", "SP500"]]

    univ_ib_cl["Return"] = filt.ret(univ_ib_cl["SP500"].values)
    univ_ib_cl["Lev1"] = 0.15 / univ_ib_vol1["SP500"].values
    univ_ib_cl["Lev2"] = 0.15 / univ_ib_vol2["SP500"].values
    univ_ib_cl["DyRet1"] = univ_ib_cl["Return"] * univ_ib_cl["Lev1"]
    univ_ib_cl["DyRet2"] = univ_ib_cl["Return"] * univ_ib_cl["Lev2"]
    t1 = univ_ib_cl["DyRet1"].values
    t2 = univ_ib_cl["DyRet2"].values
    tnan = ~np.isnan(t1) & ~np.isnan(t2)
    t1 = t1[tnan]
    t2 = t2[tnan]
    print(16 * np.mean(t1) / np.std(t1))
    print(16 * np.mean(t2) / np.std(t2))
def spline_curve(x1, _ticker='SP500', f=None, x1_=None, x2_=None, scat=True):
    univ_ib_gd = cr_cret.retrieve(univ_ib_eqidx_ext + 'ExchOpen')[_ticker].values
    univ_ib_vl = cr_vol_all_adj.retrieve(univ_ib_eqidx_ext + 'vol_pb_120')[_ticker].values
    univ_ib_s1 = cr_sig_mr_sg.retrieve(univ_ib_eqidx_ext + x1)[_ticker].values
    univ_ib_cl = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close')[_ticker].values
    z = list(np.where(univ_ib_gd.astype('int') == 1)[0])
    univ_ib_s1 = univ_ib_s1[z]
    univ_ib_cl = univ_ib_cl[z]
    univ_ib_vl = univ_ib_vl[z]

    univ_ib_cl = filt.ret(univ_ib_cl) / filt.lag(univ_ib_vl, 1)
    univ_ib_s1 = filt.lag(univ_ib_s1, 1) / filt.lag(univ_ib_vl, 1)

    univ_ib_cl, univ_ib_s1 = reduce_nonnan(univ_ib_cl, univ_ib_s1)

    print(spearmanr(univ_ib_cl, univ_ib_s1).correlation)

    test_weight = np.ones(univ_ib_cl.shape[0]) / univ_ib_cl.shape[0]

    if x1_ is None:
        x1 = np.percentile(univ_ib_s1, 25)
        x1 = x1_
    if x2_ is None:
        x2 = np.percentile(univ_ib_s1, 75)
        x2 = x2_
    testa = rpf2.cubic_fit_linreg(univ_ib_cl, univ_ib_s1, x1, x2, test_weight)
    x_ = np.linspace(-1, 1, num=101)
    b0 = testa[0]
    b1 = testa[1]
    b2 = testa[2]
    b3 = testa[3]
    a0 = testa[4]
    c0 = testa[5]
    y_ = (b0+a0*(x_<x1)+c0*(x_>x2))+(b1-3*(a0/x_)*(x_<x1)-3*(c0/x_)*(x_>x2))*x_+\
    y_[np.where(np.isnan(y_))[0]] = b0
    z_ = np.zeros(len(y_))
    pyl.plot(x_, y_)
    pyl.plot(x_, z_)
    if scat:
        pyl.scatter(univ_ib_s1, univ_ib_cl, c='c')
    return None
def test_measures():
    univ_ib_gd = cr_cret.retrieve(univ_ib_eqidx_ext + 'ExchOpen')
    _prc_names = ['CLS', 'A2D', 'A3D', 'A4D', 'A2W', 'A3W', 'A4W', 'A2M', 'A3M', 'A4M']
    _trn_names = ['LVL', 'CH0', 'CH1', 'CH2']
    _fil_names = ['LRB', 'QRB', 'QRG']
    _hoz_names = []
    for j in range(5, 305, 5):
        if j < 10:
        elif j < 100:

    _ticker = 'SP500'

    z = list(np.where(univ_ib_gd[_ticker].values.astype('int') ==1)[0])
    univ_ib_cl = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close')[_ticker].values[z]
    univ_ib_cl = filt.ret(univ_ib_cl)
    univ_ib_cl = univ_ib_cl[-2780:]

    # _len = 99999
    for i in _prc_names:
        # i = _prc_names[0]
        for j in _fil_names:
            # j = _fil_names[0]
            for k in _trn_names:
                # k = _trn_names[0]
                for m in _hoz_names:
                    # m = _hoz_names[0]
                    fn = univ_ib_eqidx_ext+i+'_'+j+'_'+k+'_'+m
                    # print(fn)
                    univ_ib_sig = cr_sig_mr_sg.retrieve(fn)[_ticker].values[z]
                    univ_ib_sig1 = filt.lag(univ_ib_sig)
                    univ_ib_sig2 = filt.lag(univ_ib_sig, 2)
                    univ_ib_sig1 = univ_ib_sig1[-2780:]
                    univ_ib_sig2 = univ_ib_sig2[-2780:]
                    t1 = spearmanr(univ_ib_cl, univ_ib_sig1).correlation
                    t2 = spearmanr(univ_ib_cl, univ_ib_sig2).correlation
                    d1 = spearmanr(np.sign(univ_ib_cl), np.sign(univ_ib_sig1)).correlation
                    d2 = spearmanr(np.sign(univ_ib_cl), np.sign(univ_ib_sig2)).correlation
                    td = np.array([t1, t2, d1, d2])*100
                    print(i+','+j+','+k+','+m+' :\t', np_to_str(td))
    return None
def refresh_correl(_window=240):
    _cls = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close')
    _vol = cr_cret.retrieve(univ_ib_eqidx_ext + 'vol_gk240')
    tcl = _cls.tick_cols()
    for i in tcl:
        _cls[i] = filt.ret(_cls[i].values)/_vol[i].values
    _corr = dummy_df(_cls)
    n = _corr.shape[0]
    # need to make this code parallel
    for j in range(0, len(tcl)-1):
        for k in range(j+1, len(tcl)):
            print('Processing rolling correlation of %s and %s' % (tcl[j], tcl[k]))
            _corr[tcl[j]+'_'+tcl[k]] = np.empty(n)*np.nan
            _tmp1 = _cls[tcl[k]].values
            _tmp2 = _cls[tcl[j]].values
            ny1 = np.maximum(filt.fst_nan(_tmp1), filt.fst_nan(_tmp2))
            for _lbck in range(_window + ny1, n):
                _corr[_lbck, tcl[j]+'_'+tcl[k]] = bst.kendall_tau(_tmp1[_lbck-_window:_lbck],
    cr_cret.store(univ_ib_eqidx_ext+'Correl', _corr)
    return None
def get_vol_adj():
    univ_ib_cl = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close')
    univ_ib_gd = cr_cret.retrieve(univ_ib_eqidx_ext + 'ExchOpen')
    tick_cols = univ_ib_cl.tick_cols()
    for k in range(0, len(vol_list)):
        print('Processing volatility %s' % vol_names[k])
        for j in vol_lookbacks:
            print('Currently working on lookback %s' % str(j))
            if j < 100:
                univ_ib_vol = cr_vol_all.retrieve(univ_ib_eqidx_ext + vol_names[k] + '_0' + str(j))
                univ_ib_vol = cr_vol_all.retrieve(univ_ib_eqidx_ext + vol_names[k] + '_' + str(j))
            for i in tick_cols:
                z = np.where(univ_ib_gd[i].values.astype('int') == 1)[0]
                if z.shape[0] > 0:
                    univ_ib_vol_ = univ_ib_vol[i].values[z]
                    univ_ib_cl_ = univ_ib_cl[i].values[z]
                    # find the beta
                    univ_ib_vol__ = filt.lag(univ_ib_vol_)
                    univ_ib_cl_ = np.abs(filt.ret(univ_ib_cl_))
                    regp = qreg4.roll_e_ladreg_1d(univ_ib_cl_, univ_ib_vol__, adj_lookback)
                    # get the beta-adjusted volatility
                    vol__ = univ_ib_vol_ * regp * _lapl_mult
                    # smooth out the volatility
                    vol__1 = filt.lrma(vol__, 61, lg=True)
                    vol__2 = filt.lrma(vol__, 7, lg=True)
                    vol__3 = vol__1 + filt.lrma(vol__2 - vol__1, 16)
                    # push the new volatility back
                    univ_ib_vol[i] = np.nan
                    univ_ib_vol[list(z), i] = vol__3
                    univ_ib_vol[i] = filt.fill(univ_ib_vol[i].values)
                    univ_ib_vol[i] = np.nan
            if j < 100:
                cr_vol_all_adj.store(univ_ib_eqidx_ext + vol_names[k] + '_0' + str(j), univ_ib_vol)
                cr_vol_all_adj.store(univ_ib_eqidx_ext + vol_names[k] + '_' + str(j), univ_ib_vol)
    return None
testv1 = mkt_retrieve(i, 'Stats', 'Volatility')
for vv in ['vol_gk240']:
    # vv = 'vol_gk240'
    # lag the volatility
    testv = filt.lag(testv1[vv].values)
    test1 = mkt_retrieve(i, 'Stats', 'Returns')

    # lag the signal
    test2 = mkt_retrieve(i, 'MovReg', 'Signals')
    tcl2 = test2.tick_cols()
    for k in tcl2:
        test2[k] = filt.lag(test2[k].values)/testv

    # get the average of returns
    test1_ = test1[['Date', 'Close']]
    test1_['Returns'] = filt.ret(test1_['Close'].values)
    fret1 = test1_['Returns'].values / testv

    num_col = len(tcl2)
    correl_vec = np.zeros(num_col)
    for z in range(0, num_col):
        correl_vec[z] = bst.kendall_tau(fret1, test2[tcl2[z]].values)

    _mom_sig = []
    _mom_sig_idx = []
    _mom_val = []
    for z in range(0, 50):
        # z = 0
        if z == 0:
            _idx = bst.argmax_fixed(correl_vec)
def check_closest_volatility():
    univ_ib_cl = cr_cret.retrieve(univ_ib_ext+'Close')
    univ_ib_rt1 = univ_ib_cl.copy()
    univ_ib_rt2 = univ_ib_cl.copy()
    univ_ib_rt3 = univ_ib_cl.copy()

    for i in univ_ib_cl.tick_cols():
        # i = 'SP500'
        univ_ib_cl[i] = filt.ret(univ_ib_cl[i].values)
        univ_ib_rt1[i] = filt.ret(univ_ib_rt1[i].values, 30)
        univ_ib_rt1[i] = filt.lag(univ_ib_rt1[i].values)
        univ_ib_rt2[i] = filt.ret(univ_ib_rt2[i].values, 60)
        univ_ib_rt2[i] = filt.lag(univ_ib_rt2[i].values)
        univ_ib_rt3[i] = filt.ret(univ_ib_rt3[i].values, 120)
        univ_ib_rt3[i] = filt.lag(univ_ib_rt3[i].values)
    univ_ib_cl_data = univ_ib_cl[univ_ib_cl.tick_cols()].values.reshape(-1, )
    univ_ib_rt1_data = univ_ib_rt1[univ_ib_rt1.tick_cols()].values.reshape(-1, )
    univ_ib_rt2_data = univ_ib_rt2[univ_ib_rt2.tick_cols()].values.reshape(-1, )
    univ_ib_rt3_data = univ_ib_rt3[univ_ib_rt3.tick_cols()].values.reshape(-1, )
    univ_ib_cl_data = np.abs(univ_ib_cl_data)
    import warnings

    warnings.simplefilter('ignore', RuntimeWarning)
    univ_ib_rt1_data = np.sign(univ_ib_rt1_data)
    univ_ib_rt2_data = np.sign(univ_ib_rt2_data)
    univ_ib_rt3_data = np.sign(univ_ib_rt3_data)
    warnings.simplefilter('default', RuntimeWarning)

    # vol_names_ = [i for i in vol_names if 'reg' not in i]

    for j in vol_names:
        for k in range(_min_range, _max_range+_step_range, _step_range):
            if k < 100:
                univ_ib_vol = cr_vol_all.retrieve(univ_ib_ext + j + '_0' + str(k))
                univ_ib_vol = cr_vol_all.retrieve(univ_ib_ext + j + '_' + str(k))
            for i in univ_ib_vol.tick_cols():
                univ_ib_vol[i] = filt.lag(univ_ib_vol[i].values)
            univ_ib_vol_data = univ_ib_vol[univ_ib_vol.tick_cols()].values.reshape(-1, )

            univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data)
            univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn]*_norm_mult
            univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn]
            res0 = simple_quant_reg(univ_ib_cl_data_, univ_ib_vol_data_)
            # res0 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params

            univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt1_data < 0)
            univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn]*_norm_mult
            univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn]
            res1 = simple_quant_reg(univ_ib_cl_data_, univ_ib_vol_data_)
            # res1 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params

            univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt2_data < 0)
            univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn]*_norm_mult
            univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn]
            res2 = simple_quant_reg(univ_ib_cl_data_, univ_ib_vol_data_)
            # res2 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params

            univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt3_data < 0)
            univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn]*_norm_mult
            univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn]
            # res3 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params
            res3 = simple_quant_reg(univ_ib_cl_data_, univ_ib_vol_data_)

            univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt1_data > 0)
            univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn] * _norm_mult
            univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn]
            # res4 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params
            res4 = simple_quant_reg(univ_ib_cl_data_, univ_ib_vol_data_)

            univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt2_data > 0)
            univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn] * _norm_mult
            univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn]
            # res5 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params
            res5 = simple_quant_reg(univ_ib_cl_data_, univ_ib_vol_data_)

            univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt3_data > 0)
            univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn] * _norm_mult
            univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn]
            # res6 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params
            res6 = simple_quant_reg(univ_ib_cl_data_, univ_ib_vol_data_)

            if k < 100:
                print(j + '_0' + str(k)+'\t', np_to_str(res0), '\t', np_to_str(res1), '\t', np_to_str(res2), '\t', np_to_str(res3), '\t',
                      np_to_str(res4), '\t', np_to_str(res5), '\t', np_to_str(res6), '\t')
                print(j + '_' + str(k)+'\t', np_to_str(res0), '\t', np_to_str(res1), '\t', np_to_str(res2), '\t', np_to_str(res3), '\t',
                      np_to_str(res4), '\t', np_to_str(res5), '\t', np_to_str(res6), '\t')
def get_cs_factor_portfolios():
    # just testing here
    # read the closing prices, convert it to returns
    _cls = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close')
    tcl = _cls.tick_cols()
    for j in tcl:
        _cls[j] = filt.ret(_cls[j].values)

    # read the volatility, signal, pca beta, lag them
    _vol = cr_cret.retrieve(univ_ib_eqidx_ext + 'vol_gk240')
    _sig = cr_sret.retrieve(univ_ib_eqidx_ext + 'D10S26_521_QRB_LVL')
    # _sig = cr_sret.retrieve(univ_ib_eqidx_ext + _all_signals_p1_55[2])
    _pca1 = cr_cret.retrieve(univ_ib_eqidx_ext + 'PCA1_Beta')
    _pca2 = cr_cret.retrieve(univ_ib_eqidx_ext + 'PCA2_Beta')
    for j in tcl:
        _sig[j] = filt.lag(_sig[j].values)
        _pca1[j] = filt.lag(_pca1[j].values)
        _pca2[j] = filt.lag(_pca2[j].values)
        _vol[j] = filt.lag(_vol[j].values)

    # find starting point, at least 20 markets are live (from both beta, sig)
    nc = len(tcl)
    ny = np.zeros(nc)
    for j in range(0, nc):
        ny[j] = np.maximum(filt.fst_nan(_sig[tcl[j]].values), filt.fst_nan(_pca1[tcl[j]].values))
    _ny_mn = int(np.min(ny))
    _ny_mx = int(np.max(ny))
    _ny = _ny_mn
    for j in range(_ny_mn, _ny_mx):
        if np.where(ny >= j)[0].shape[0] >= 20:
            _ny = j

    # cross-sectionally normalize the signal
    _sign1 = _sig.copy()
    _sign2 = _sig.copy()
    _sign3 = _sig.copy()
    _sign4 = _sig.copy()
    _sign5 = _sig.copy()
    _sign6 = _sig.copy()
    for j in tcl:
        _sign1[j] = np.nan
        _sign2[j] = np.nan
        _sign3[j] = np.nan
        _sign4[j] = np.nan
        _sign5[j] = np.nan
        _sign6[j] = np.nan
    for j in range(_ny, _sig.shape[0]):
        _tmp = _sig[j, tcl] / _vol[j, tcl]
        _sign1[j, tcl] = half_norm_rankit(_tmp)
        _sign2[j, tcl] = _sign1[j, tcl].values
        _sign3[j, tcl] = _sign1[j, tcl].values
        _sign4[j, tcl] = full_norm_rankit(_tmp)
        _sign5[j, tcl] = _sign4[j, tcl].values
        _sign6[j, tcl] = _sign4[j, tcl].values

    # calculate returns using risk parity portfolio approaches
    bk_test = dummy_df(_vol)
    bk_test['H1'] = np.nan
    bk_test['H2'] = np.nan
    bk_test['H3'] = np.nan
    bk_test['F1'] = np.nan
    bk_test['F2'] = np.nan
    bk_test['F3'] = np.nan

    for j in range(_ny, _sig.shape[0]):
        # j = _ny
        _sign1_tmp = _sign1[j:j, tcl].values
        _sign2_tmp = _sign2[j:j, tcl].values
        _sign3_tmp = _sign3[j:j, tcl].values
        _sign4_tmp = _sign4[j:j, tcl].values
        _sign5_tmp = _sign5[j:j, tcl].values
        _sign6_tmp = _sign6[j:j, tcl].values
        _cls_tmp = _cls[j:j, tcl].values
        _vol_tmp = _vol[j:j, tcl].values
        _pca1_tmp = _pca1[j:j, tcl].values
        _pca2_tmp = _pca2[j:j, tcl].values

        _sign1_tmp, _sign2_tmp, _sign3_tmp, _sign4_tmp, _sign5_tmp, _sign6_tmp, _cls_tmp, _vol_tmp, _pca1_tmp,\
            _pca2_tmp = reduce_nonnan(_sign1_tmp, _sign2_tmp, _sign3_tmp, _sign4_tmp, _sign5_tmp, _sign6_tmp,
                                      _cls_tmp, _vol_tmp, _pca1_tmp, _pca2_tmp)

        _tmp11 = np.dot(_pca1_tmp, _pca1_tmp)
        _tmp22 = np.dot(_pca2_tmp, _pca2_tmp)
        _tmps11 = np.dot(_sign1_tmp, _pca1_tmp)
        _tmps12 = np.dot(_sign1_tmp, _pca2_tmp)
        _tmps21 = np.dot(_sign4_tmp, _pca1_tmp)
        _tmps22 = np.dot(_sign4_tmp, _pca2_tmp)

        _sign2_tmp = _sign2_tmp - _pca1_tmp * _tmps11 / _tmp11
        _sign5_tmp = _sign5_tmp - _pca1_tmp * _tmps21 / _tmp11

        _sign3_tmp = _sign3_tmp - _pca1_tmp * _tmps11 / _tmp11 - _pca2_tmp * _tmps12 / _tmp22
        _sign6_tmp = _sign6_tmp - _pca1_tmp * _tmps21 / _tmp11 - _pca2_tmp * _tmps22 / _tmp22

        _sign1_tmp_sum = np.sum(np.abs(_sign1_tmp))
        _sign2_tmp_sum = np.sum(np.abs(_sign2_tmp))
        _sign3_tmp_sum = np.sum(np.abs(_sign3_tmp))
        _sign4_tmp_sum = np.sum(np.abs(_sign4_tmp))
        _sign5_tmp_sum = np.sum(np.abs(_sign5_tmp))
        _sign6_tmp_sum = np.sum(np.abs(_sign6_tmp))

        _sign1_tmp = _sign1_tmp / _sign1_tmp_sum
        _sign2_tmp = _sign2_tmp / _sign2_tmp_sum
        _sign3_tmp = _sign3_tmp / _sign3_tmp_sum
        _sign4_tmp = _sign4_tmp / _sign4_tmp_sum
        _sign5_tmp = _sign5_tmp / _sign5_tmp_sum
        _sign6_tmp = _sign6_tmp / _sign6_tmp_sum

        _lev_tmp = 0.005 / _vol_tmp
        _ret_tmp = _cls_tmp * _lev_tmp

        bk_test[j, 'H1'] = np.dot(_sign1_tmp, _ret_tmp)
        bk_test[j, 'H2'] = np.dot(_sign2_tmp, _ret_tmp)
        bk_test[j, 'H3'] = np.dot(_sign3_tmp, _ret_tmp)
        bk_test[j, 'F1'] = np.dot(_sign4_tmp, _ret_tmp)
        bk_test[j, 'F2'] = np.dot(_sign5_tmp, _ret_tmp)
        bk_test[j, 'F3'] = np.dot(_sign6_tmp, _ret_tmp)

    _dt = bk_test['Date'].values[_ny:]
    f1 = plt.figure(1)
    for jidx, j in enumerate(['H1', 'H2', 'H3']):
        _sh = np.nanmean(bk_test[j].values)*16/np.nanstd(bk_test[j].values)
        if _sh > 0:
            _mx = avg_drawdown(bk_test[_ny:, j].values) / (16 * np.std(bk_test[_ny:, j].values))
            testh = conv_to_price(bk_test[j].values)
            _mx = avg_drawdown(-bk_test[_ny:, j].values) / (16 * np.std(bk_test[_ny:, j].values))
            testh = conv_to_price(-bk_test[j].values)
            _sh = -_sh
        print([_sh, _mx])
        plt.subplot(3, 1, jidx+1)
        plot_ts_new(_dt, testh[_ny:])
    f2 = plt.figure(2)
    for jidx, j in enumerate(['F1', 'F2', 'F3']):
        _sh = np.nanmean(bk_test[j].values) * 16 / np.nanstd(bk_test[j].values)
        if _sh > 0:
            _mx = avg_drawdown(bk_test[_ny:, j].values) / (16 * np.std(bk_test[_ny:, j].values))
            testh = conv_to_price(bk_test[j].values)
            _mx = avg_drawdown(-bk_test[_ny:, j].values) / (16 * np.std(bk_test[_ny:, j].values))
            testh = conv_to_price(-bk_test[j].values)
            _sh = -_sh
        print([_sh, _mx])
        plt.subplot(3, 1, jidx + 1)
        plot_ts_new(_dt, testh[_ny:])
    _bk_test = bk_test[_ny:, bk_test.tick_cols()].values

    # bk_test3 = bk_test.copy()


    plt.subplot(3, 1, 1)
    tmp = bk_test1[:, 'F1'].values
    plot_ts_new(_dt, conv_to_price(tmp)[_ny:])
    plt.subplot(3, 1, 2)
    tmp = bk_test2[:, 'F1'].values
    plot_ts_new(_dt, conv_to_price(tmp)[_ny:])
    plt.subplot(3, 1, 3)
    tmp = 0.5 * -bk_test3[:, 'F1'].values + 0.5 * bk_test2[:, 'F2'].values
    plot_ts_new(_dt, conv_to_price(tmp)[_ny:])
    print(np.nanmean(tmp) * 16 / np.nanstd(tmp))
    test2 = mkt_retrieve(i, 'MovReg', 'Signals_Pct_' + str(j))
    tcl2 = test2.tick_cols()
    for lmb in [100]:
        # lmb = 0.01
        print('Using lookback of %s with lambda of %s' % (str(j), str(lmb)))

        for k in range(2, 3):
            # k = 1
            # lag the signal data
            test2_ = test2.copy()
            for tcl2_ in tcl2:
                test2_[tcl2_] = filt.lag(test2_[tcl2_].values, k)

            # get the average of returns
            test1_ = test1[['Date', 'Close']]
            test1_['Returns'] = filt.ret(test1_['Close'].values)
            test1_['MultiReturns'] = filt.ret(test1_['Close'].values, k)/k
            test1_['MultiReturns'] = filt.mpc(test1_['MultiReturns'].values, j)
            test1_['ReturnPct'] = filt.mpc(test1_['Returns'].values, j)

            # get beta
            # beta1 = qreg5.roll_s_ladreg_2d_l2n(test1_['MultiReturns'].values,
            #                                    np.ascontiguousarray(test2_[test2_.tick_cols()].values),
            #                                    j, lmb, 30)
            beta2 = qreg5.roll_w_ladreg_2d_l2n(test1_['MultiReturns'].values,
                                               j, lmb, 30)
            # beta1_ = test2.copy()
            beta2_ = test2.copy()
    test2_ = mkt_retrieve(i, 'MovReg', 'Changes7')[['Date']+_sig_set_95_ch7]
    test2_.set_columns(['Date']+[k + '_CH7' for k in _sig_set_95_ch7])
    test2 = DataFrame.merge(test2, test2_, on='Date')

    test2_ = mkt_retrieve(i, 'MovReg', 'Changes9')[['Date']+_sig_set_95_ch9]
    test2_.set_columns(['Date']+[k + '_CH9' for k in _sig_set_95_ch9])
    test2 = DataFrame.merge(test2, test2_, on='Date')

    tcl2 = test2.tick_cols()
    for k in tcl2:
        test2[k] = filt.lag(test2[k].values)/testv

    # get the returns
    test1 = mkt_retrieve(i, 'Stats', 'Returns')[['Date', 'Close']]
    test1['Returns'] = filt.ret(test1['Close'].values)/testv
    del test1['Close']

    tcl2 = test2.tick_cols()

    ny = filt.fst_nan(test1['Returns'].values)
    for k in tcl2:
        ny = np.maximum(ny, filt.fst_nan(test2[k].values))

    test1 = test1[ny:, :]
    test2 = test2[ny:, :]

    if i == _eq_idx[0]:
        test1_all = test1.copy()
        test2_all = test2.copy()
        for j in range(i, e2):
            x_.append((i, j))
    return x_

n_range = get_permut(5, 50, 50)
f_range = get_permut(5, 70, 70)

results = {}
for n in n_range:
    for f in f_range:
        for oshpr in range(4, 40, 1):
            _loss = n[0]+n[1]+f[0]+f[1]+oshpr
            if _loss <= 250:
                test1p = filt.smth_price(test3p_, test2p_, n[0], n[1], f[0], f[1], 0, oshpr)
                test1p, test2p, test3p = reduce_nonnan(test1p, test2p_, test3p_)
                g1 = autocorr(filt.ret(test1p)[-7000:])
                k = int((1+g1)/(1-g1))
                if k > 5:
                    xoc_mx = test1p - np.maximum(test2p, test3p)
                    xoc_mn = test1p - np.minimum(test2p, test3p)
                    good2_ = np.abs(200 * ((xoc_mx > 0) * xoc_mx + (xoc_mn < 0) * xoc_mn) / (test2p + test3p))
                    good2 = np.mean(good2_[-7000:]) * 2 / (k + 1) + np.median(good2_[-7000:]) * (k - 1) / (k + 1)
                    good2 *= np.sqrt(_loss)
                    if k in results.keys():
                        tmp = results[k]
                        if tmp[5] > good2:
                            results[k] = [n[0], n[1], f[0], f[1], oshpr, good2]
                        results[k] = [n[0], n[1], f[0], f[1], oshpr, good2]
                        print([k] + results[k])
def check_closest_volatility():
    univ_ib_cl = cr_cret.retrieve(univ_ib_ext + "Close")
    univ_ib_rt1 = univ_ib_cl.copy()
    univ_ib_rt2 = univ_ib_cl.copy()
    univ_ib_rt3 = univ_ib_cl.copy()

    for i in univ_ib_cl.tick_cols():
        univ_ib_cl[i] = filt.ret(univ_ib_cl[i].values)
        univ_ib_rt1[i] = filt.ret(univ_ib_rt1[i].values, 30)
        univ_ib_rt1[i] = filt.lag(univ_ib_rt1[i].values)
        univ_ib_rt2[i] = filt.ret(univ_ib_rt2[i].values, 60)
        univ_ib_rt2[i] = filt.lag(univ_ib_rt2[i].values)
        univ_ib_rt3[i] = filt.ret(univ_ib_rt3[i].values, 120)
        univ_ib_rt3[i] = filt.lag(univ_ib_rt3[i].values)
    univ_ib_cl_data = univ_ib_cl[univ_ib_cl.tick_cols()].values.reshape(-1)
    univ_ib_rt1_data = univ_ib_rt1[univ_ib_rt1.tick_cols()].values.reshape(-1)
    univ_ib_rt2_data = univ_ib_rt2[univ_ib_rt2.tick_cols()].values.reshape(-1)
    univ_ib_rt3_data = univ_ib_rt3[univ_ib_rt3.tick_cols()].values.reshape(-1)
    univ_ib_cl_data = np.abs(univ_ib_cl_data)
    import warnings

    warnings.simplefilter("ignore", RuntimeWarning)
    univ_ib_rt1_data = np.sign(univ_ib_rt1_data)
    univ_ib_rt2_data = np.sign(univ_ib_rt2_data)
    univ_ib_rt3_data = np.sign(univ_ib_rt3_data)
    warnings.simplefilter("default", RuntimeWarning)

    # vol_names_ = [i for i in vol_names if 'reg' not in i]

    for j in vol_names_sm:
        for k in range(_min_range, _max_range, _step_range * 3):
            if k < 100:
                univ_ib_vol = cr_vol_all.retrieve(univ_ib_ext + j + "_0" + str(k) + "D")
                univ_ib_vol = cr_vol_all.retrieve(univ_ib_ext + j + "_" + str(k) + "D")
            for i in univ_ib_vol.tick_cols():
                univ_ib_vol[i] = filt.lag(univ_ib_vol[i].values)
            univ_ib_vol_data = univ_ib_vol[univ_ib_vol.tick_cols()].values.reshape(-1)

            univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data)
            univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn] * _norm_mult
            univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn]
            res0 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params

            univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt1_data < 0)
            univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn] * _norm_mult
            univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn]
            res1 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params

            univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt2_data < 0)
            univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn] * _norm_mult
            univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn]
            res2 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params

            univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt3_data < 0)
            univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn] * _norm_mult
            univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn]
            res3 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params

            univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt1_data > 0)
            univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn] * _norm_mult
            univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn]
            res4 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params

            univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt2_data > 0)
            univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn] * _norm_mult
            univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn]
            res5 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params

            univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt3_data > 0)
            univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn] * _norm_mult
            univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn]
            res6 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params

            if k < 100:
                    j + "_0" + str(k) + "\t",
                    j + "_" + str(k) + "\t",
                # f1 = 200
                for f2 in range(6, 36, 2): # range(2, 36, 2):
                    # f2 = 28
                    for lthresh in range(0, 2, 1): #[1]:
                        # lthresh = 1
                        for oshpr in range(13, 16, 3):
                            # oshpr = 28
                            if (np.isnan(good2b) | (~np.isnan(good2b) & (good2b > 0.0001))) & (n1+n2+f1+f2 < 180):
                                # print(n1, n2, f1, f2, lthresh, oshpr)
                                test1p = spec_smth(univ_ib_op[_ticker].values, univ_ib_cl[_ticker].values, n1, n2, f1, f2, lthresh, oshpr)
                                test2p = univ_ib_cl[_ticker].values
                                test3p = univ_ib_op[_ticker].values
                                testkp = np.copy(testk)
                                test1p, test2p, test3p, testkp = reduce_nonnan(test1p, test2p, test3p, testkp)

                                test1r = filt.ret(test1p)
                                testkr = filt.ret(testkp)
                                test1r, testkr = reduce_nonnan(test1r, testkr)

                                # good1 = autocorr(test1r)-autocorr(test2r)
                                good1 = autocorr(test1r[-7000:])
                                good1k = autocorr(testkr[-7000:])

                                # get the overshoot measure
                                xoc_mx = test1p-np.maximum(test2p, test3p)
                                xoc_mn = test1p-np.minimum(test2p, test3p)
                                good2_ = np.abs(200*((xoc_mx>0)*xoc_mx+(xoc_mn<0)*xoc_mn)/(test2p+test3p))
                                # good2 = np.mean(good2_[-7000:])
                                good2 = np.median(good2_[-7000:])

                                xoc_mxk = testkp-np.maximum(test2p, test3p)
def test_measures2():
    pd.set_option('display.max_columns', 30)
    pd.set_option('display.max_rows', 100)
    univ_ib_gd = cr_cret.retrieve(univ_ib_eqidx_ext + 'ExchOpen')
    _prc_names = ['CLS', 'A2D', 'A3D', 'A4D', 'A2W', 'A3W', 'A4W', 'A2M', 'A3M', 'A4M']
    _trn_names = ['LVL', 'CH0', 'CH1', 'CH2']
    _fil_names = ['LRB', 'QRB', 'QRG']
    _hoz_names = []
    for j in range(5, 305, 5):
        if j < 10:
        elif j < 100:

    univ_ib_cl = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close')
    tick_cols = univ_ib_cl.tick_cols()
    for n in tick_cols:
        z = list(np.where(univ_ib_gd[n].values.astype('int') == 1)[0])
        univ_ib_cl_ = filt.ret(univ_ib_cl[n].values[z])
        univ_ib_cl[n] = np.nan
        univ_ib_cl[z, n] = univ_ib_cl_
        univ_ib_cl[n] = filt.fill1(univ_ib_cl[n].values, 0)

    # tick_cols_ = tick_cols[0:1]
    tick_cols = ['SP500', 'SP400Mid', 'Nikkei225', 'R2000', 'FTSE100', 'DAX',
                 'SMI', 'CAC40', 'AEX', 'MIBFTSE', 'IBEX35', 'OMXS30']
    siz_ = [5330, 7115, 6757, 7349, 5324, 6785, 6794, 6199, 7232, 7936, 7180, 7295]
    # siz = [2784, 999, 1357, 765, 2790, 1329, 1320, 1915, 882, 178, 934, 819]

    tick_cols = tick_cols[1:]
    siz_ = siz_[1:]

    for k in _trn_names:
        dummy_cols = []
        for i in _prc_names:
            for j in _fil_names:
        x_ = np.random.random(len(_hoz_names))*np.nan
        y_ = np.random.random(len(_hoz_names))*np.nan
        dummy_df = DataFrame({dummy_cols[0]: x_, dummy_cols[1]: y_})
        for i in range(2, 30):
            dummy_df[dummy_cols[i]] = x_

        _col = 0
        for i in _prc_names:
            # j = _fil_names[0]
            for j in _fil_names:
                # k = _trn_names[0]
                for mi, m in enumerate(_hoz_names):
                    # m = _hoz_names[0]
                    fn = univ_ib_eqidx_ext+i+'_'+j+'_'+k+'_'+m
                    univ_ib_sig = cr_sig_mr_sg.retrieve(fn)
                    ret_, sig_ = np.empty(0), np.empty(0)
                    for ni, n in enumerate(tick_cols):
                        # ni = 0
                        # n = tick_cols[ni]
                        z = list(np.where(univ_ib_gd[n].values.astype('int') == 1)[0])
                        z = [z_ for z_ in z if z_ > siz_[ni]]   # 5 is just a buffer
                        sig__ = univ_ib_sig[n].values[z]
                        ret__ = univ_ib_cl[n].values[z]
                        sig__ = filt.lag(sig__, 2)
                        ret_ = np.hstack((ret_, ret__[2:]))
                        sig_ = np.hstack((sig_, sig__[2:]))
                    gidx = np.where(~np.isnan(ret_) & ~np.isnan(sig_))[0]
                    dummy_df[mi, dummy_cols[_col]] = 100*spearmanr(ret_[gidx], sig_[gidx]).correlation
                _col += 1
        univ_ib_vol6 = cr_vol_all.retrieve(univ_ib_ext+'vol_gk_'+lookback)[['Date', ticker]]
        univ_ib_vol7 = cr_vol_all.retrieve(univ_ib_ext+'vol_yz_'+lookback)[['Date', ticker]]

        z = list(np.where(univ_ib_gd[ticker].values.astype('int') == 1)[0])

        univ_ib_cl = univ_ib_cl[z, :]
        univ_ib_vol0 = univ_ib_vol0[z, :]
        univ_ib_vol1 = univ_ib_vol1[z, :]
        univ_ib_vol2 = univ_ib_vol2[z, :]
        univ_ib_vol3 = univ_ib_vol3[z, :]
        univ_ib_vol4 = univ_ib_vol4[z, :]
        univ_ib_vol5 = univ_ib_vol5[z, :]
        univ_ib_vol6 = univ_ib_vol6[z, :]
        univ_ib_vol7 = univ_ib_vol7[z, :]

        univ_ib_cl[ticker] = np.abs(filt.ret(univ_ib_cl[ticker].values))
        univ_ib_vol0[ticker] = filt.lag(univ_ib_vol0[ticker].values)
        univ_ib_vol1[ticker] = filt.lag(univ_ib_vol1[ticker].values)
        univ_ib_vol2[ticker] = filt.lag(univ_ib_vol2[ticker].values)
        univ_ib_vol3[ticker] = filt.lag(univ_ib_vol3[ticker].values)
        univ_ib_vol4[ticker] = filt.lag(univ_ib_vol4[ticker].values)
        univ_ib_vol5[ticker] = filt.lag(univ_ib_vol5[ticker].values)
        univ_ib_vol6[ticker] = filt.lag(univ_ib_vol6[ticker].values)
        univ_ib_vol7[ticker] = filt.lag(univ_ib_vol7[ticker].values)

        univ_ib_dt_ = univ_ib_cl['Date'].values
        univ_ib_cl_ = univ_ib_cl[ticker].values
        univ_ib_vol0_ = univ_ib_vol0[ticker].values
        univ_ib_vol1_ = univ_ib_vol1[ticker].values
        univ_ib_vol2_ = univ_ib_vol2[ticker].values
        univ_ib_vol3_ = univ_ib_vol3[ticker].values
def check_price_forecast():

    univ_ib_dt = cr_cret.retrieve(univ_ib_eqidx_ext + 'ExchOpen')['Date'].values
    univ_ib_gd = cr_cret.retrieve(univ_ib_eqidx_ext + 'ExchOpen')['SP500'].values
    z = np.where(univ_ib_gd.astype('int') == 1)[0]
    univ_ib_cl = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close')['SP500'].values[z]
    univ_ib_vl = cr_vol_all_adj.retrieve(univ_ib_eqidx_ext + 'vol_pb_120')['SP500'].values[z]
    univ_ib_dt = univ_ib_dt[z]

    nlag = 2

    n1 = filt.fst_nan(univ_ib_cl)
    n1 = np.maximum(n1, filt.fst_nan(univ_ib_vl))
    univ_ib_sig = np.empty(0)
    univ_ib_sig_ = np.empty(0)

    for days in range(1, 11):
        for smth in range(5, 33):
            if days < 10:
                if smth < 10:
                    tmp = cr_aret.retrieve(univ_ib_eqidx_ext + 'AdjSerD0' + str(days)+'S0' + str(smth))['SP500'].values[z]
                    tmp = cr_aret.retrieve(univ_ib_eqidx_ext + 'AdjSerD0' + str(days)+'S' + str(smth))['SP500'].values[z]
                if smth < 10:
                    tmp = cr_aret.retrieve(univ_ib_eqidx_ext + 'AdjSerD' + str(days)+'S0' + str(smth))['SP500'].values[z]
                    tmp = cr_aret.retrieve(univ_ib_eqidx_ext + 'AdjSerD' + str(days)+'S' + str(smth))['SP500'].values[z]
            # get the change in the signal
            # tmp = np.sign(filt.chg(tmp))

            # lag the data
            # tmp = filt.lag(tmp, nlag)

            if univ_ib_sig.shape[0] == 0:
                univ_ib_sig = tmp
                univ_ib_sig = np.vstack((univ_ib_sig, tmp))
            n1 = np.maximum(n1, filt.fst_nan(tmp))

    univ_ib_cl_ = np.sign(filt.chg(univ_ib_cl, nlag))

    univ_ib_cl_ = univ_ib_cl_[n1:]
    univ_ib_vl_ = univ_ib_vl[n1:]
    univ_ib_sig = univ_ib_sig[:, n1:]

    names = []
    for days in range(1, 11):
        for smth in range(5, 33):
            if (days < 10) and (smth < 10):
            elif (days < 10) and (smth >= 10):
            elif (days >= 10) and (smth < 10):

    for i in range(0, len(names)):
        print(names[i], ':', smart_kendall(univ_ib_cl_, univ_ib_sig[i, :]))

    plot_ts_new(univ_ib_dt, univ_ib_cl)
    plot_ts_new(univ_ib_dt, univ_ib_sig[167, :])

    t1 = filt.ret(filt.lag(univ_ib_sig[167, :]))
    t1_ = filt.ret(univ_ib_sig[167, :])
    t2 = filt.ret(univ_ib_cl)
    t3 = t2*np.sign(t1)

    t1_, t2, t3 = reduce_nonnan(t1_, t2, t3)
    ct1_ = np.cumprod(1+t1_)*100
    ct2 = np.cumprod(1+t2)*100
    ct3 = np.cumprod(1-t3)*100

    f = pyl.figure(1)
    pyl.subplot(3, 1, 1)
    pyl.subplot(3, 1, 2)
    pyl.subplot(3, 1, 3)
lookbacks = ['120', '240', '360']

np_nice_options(linelen=250, numpres=8)

_norm_mult = np.sqrt(2/np.pi)
_lapl_mult = 1/np.sqrt(2)
_rand_mult = 0.60

tickers = ['SP500', 'DAX', 'Nikkei225', 'ESTX50', 'SMI', 'RDX', 'MSCIEM']

i = 'SP500'
univ_ib_cl = mkt_retrieve(i, 'Stats', 'Returns')['Close'].values
univ_ib_vl = mkt_retrieve(i, 'Stats', 'Volatility')['vol_pb240'].values

univ_ib_cl = np.abs(filt.ret(univ_ib_cl))
univ_ib_vl = filt.lag(univ_ib_vl)

univ_ib_cl, univ_ib_vl = reduce_nonnan(univ_ib_cl, univ_ib_vl )

univ_ib_vl *= _lapl_mult
univ_ib_vl2 = np.sqrt(univ_ib_vl)
univ_ib_vl2 = univ_ib_vl2 * med_abs_dev(univ_ib_vl)/med_abs_dev(univ_ib_vl2)

b41 = qreg.roll_e_ladreg_1d(univ_ib_cl, univ_ib_vl, 240)

b51 = qreg.roll_e_ladreg_2d(univ_ib_cl, mcc(univ_ib_vl, univ_ib_vl2), 240)

resid0 = univ_ib_cl - univ_ib_vl
resid1 = univ_ib_cl - univ_ib_vl * filt.lag(b41)
resid2 = univ_ib_cl - univ_ib_vl * filt.lag(np.ascontiguousarray(b51[:, 0])) - univ_ib_vl2 * filt.lag(np.ascontiguousarray(b51[:, 1]))