Пример #1
0
Файл: ean.py Проект: tt9024/huan
def get_mu(x0,wt_decay=None,std_th=3,bootstrap=True,dn='norm',smooth_beta=None) :
    """
    remove outliers and get the median
    To be improved by maximu likelihood
    """
    x=x0.copy()
    n=len(x)
    if wt_decay is not None:
        wt=l1d.getwt(n,wt_decay)
    else :
        wt=np.ones(n).astype(float)
    wt/=np.sum(wt)
    xm=np.dot(x,wt)
    xs=np.sqrt(np.dot((x-xm)**2,wt))
    # outlier
    x=outlier(x,xm,xs,in_th=std_th,out_th=std_th*4)

    #ix=np.nonzero(np.abs(x-xm) > xs*std_th)[0]
    #if len(ix) > 0 :
    #    x[ix]=xm+np.sign(x[ix]-xm)*(xs*std_th)
    if smooth_beta is not None :
        #x=tanh_smooth(x, xm, xs, beta=smooth_beta)
        x=tanh_smooth(x, x.mean(), x.std(), beta=smooth_beta)
    if bootstrap :
        mu=[]
        try_cnt = n*3/2
        for i in np.arange(try_cnt) :
            mu.append(np.mean(x[np.random.choice(n,n*2/3,p=wt)]))
        x=np.array(mu)

    return f3(x,dn=dn), np.std(x)
Пример #2
0
def shp_cm(wb5s, yh, wt_decay):
    sc = np.cumsum(wb5s.T * np.sign(yh), axis=0).T
    wt = l1_reader.getwt(sc.shape[0], wt_decay)
    wt /= np.sum(wt)
    scm = np.dot(sc.T, wt)
    scsd = np.sqrt(np.dot((sc - scm).T**2, wt))
    #scm=np.mean(sc,axis=0)
    #scsd=np.std(sc,axis=0)
    return scm / scsd
Пример #3
0
Файл: sim.py Проект: tt9024/huan
def getYh(Xtrain,
          Ytrain,
          Xtest,
          clf,
          wt_decay=None,
          feat_select=True,
          fit_sign=False,
          wt_neg=False):
    #omp=linear_model.Lars()
    #omp=linear_model.OrthogonalMatchingPursuitCV()
    #omp=linear_model.LassoCV()
    #omp=linear_model.RidgeCV()

    omp = copy.deepcopy(clf)
    n, k = Xtrain.shape
    if feat_select:
        ix = feat_sel(Xtrain, Ytrain)
        if len(ix) == 0:
            print 'nothing got selected!'
            return np.ones(Xtest.shape[0]) * Ytrain.mean(), omp, [0]
        ix = np.r_[0, ix]
    else:
        ix = np.arange(k)
    wt = None
    if wt_decay is not None:
        wt = l1_reader.getwt(len(Ytrain), wt_decay)
    if fit_sign:
        #remove zero cases
        ixz = np.nonzero(Ytrain != 0)[0]
        Xtrain = Xtrain[ixz, :]
        Ytrain0 = np.sign(Ytrain[ixz])
        if wt is not None:
            wt = wt[ixz] * np.sqrt(np.abs(Ytrain[ixz]))
            #wt=wt[ixz]
            #pl.figure() ; pl.plot(np.abs(Ytrain)) ; pl.plot(np.sqrt(np.abs(Ytrain0)))
    else:
        Ytrain0 = Ytrain

    if wt_neg:
        ixn = np.nonzero(Ytrain0 < 0)[0]
        if len(ixn) > 0:
            if wt is None:
                wt = np.ones(len(Ytrain0))
            wt[ixn] *= (1 + np.sqrt(np.abs(Ytrain0[ixn])))
    omp.fit(Xtrain[:, ix], Ytrain0, sample_weight=wt)
    #ys=np.abs(Ytrain)
    #wty=np.sqrt(ys/ys.std()+1)
    #wty/=wty.std()
    #omp.fit(Xtrain[:,ix], Ytrain0,sample_weight=wt*wty)

    if fit_sign:
        yp = omp.predict_proba(Xtest[:, ix])
        yh = yp[:, -1] - yp[:, 0]
    else:
        yh = omp.predict(Xtest[:, ix])
    return yh, omp, ix.copy()
Пример #4
0
Файл: sim.py Проект: tt9024/huan
def getYTrain(wb5m, y0=-42, y1=-30, wt_decay=0.1, fd=None):
    if fd is not None:
        y = fd.copy()
    else:
        y = np.sum(wb5m[:, y0:y1, 1], axis=1)
    wt = l1_reader.getwt(len(y), wt_decay)
    wt /= np.sum(wt)
    mu = np.dot(y, wt)
    sd = np.sqrt(np.dot(wt, (y - mu)**2))
    y0 = ean.outlier(y, mu, sd, in_th=1, out_th=3)
    return y0, mu, sd
Пример #5
0
Файл: sim.py Проект: tt9024/huan
def eval_latest_avg(x, lb=16, k=3, step=2, wt_decay=0.1):
    n = len(x)
    avg = []
    for ix1, ix2 in zip(n - np.arange(k) * step - lb, n - np.arange(k) * step):
        #print ix1, ix2
        avg.append(np.mean(x[ix1:ix2]))
    if len(avg) > 1:
        wt = l1_reader.getwt(k, wt_decay)[::-1]
        wt /= np.sum(wt)
    else:
        wt = np.array([1])
    #print avg, wt
    return np.dot(avg, wt)
Пример #6
0
Файл: sim.py Проект: tt9024/huan
def wt_corr2d(X2d, y, wt_decay=0.1, whiten=True):
    n, k = X2d.shape
    assert n == len(y), 'X2d and y shape mismatch'
    wt = l1_reader.getwt(n, wt_decay)
    wt /= np.sum(wt)

    mcorr = []
    mcstd = []
    for i in np.arange(k):
        mc, ms = wt_corr(X2d[:, i], y, wt=wt, whiten=whiten)
        mcorr.append(mc)
        mcstd.append(ms)
    mcorr = np.array(mcorr)
    mcstd = np.array(mcstd)
    return mcorr, mcstd
Пример #7
0
Файл: sim.py Проект: tt9024/huan
def score_sign_insample(clf,
                        X0,
                        Y0,
                        wt_train=0.5,
                        lb=16,
                        k=3,
                        step=2,
                        wt_decay=0.1):
    clf0 = copy.deepcopy(clf)
    n, m = X0.shape
    wt = l1_reader.getwt(n, wt_train)
    wt /= np.sum(wt)
    clf0.fit(X0, Y0, wt)
    yh = clf0.predict(X0[-lb - k * step:, :])
    x = Y0[-lb - k * step:] * np.sign(yh)
    return eval_latest_avg(x, lb=lb, k=k, step=step, wt_decay=wt_decay), clf0
Пример #8
0
Файл: sim.py Проект: tt9024/huan
def wt_corr(x1d, y, wt=None, wt_decay=0.1, whiten=True):
    n = len(x1d)
    assert n == len(y), 'x1d and y length mismatch'
    if wt is None:
        wt = l1_reader.getwt(n, wt_decay)
        wt /= np.sum(wt)

    x0 = x1d.copy()
    y0 = y.copy()
    if whiten:
        x0 -= x0.mean()
        sd = x0.std()
        if sd > 1e-10:
            x0 /= sd
        y0 -= y0.mean()
        sd = y0.std()
        if sd > 1e-10:
            y0 /= sd

    W = np.diag(wt)
    mc = np.dot(x0, np.dot(W, y0))
    ms0 = x0 * y0 - mc
    ms = np.sqrt(np.dot(ms0, np.dot(W, ms0)))
    return mc, ms
Пример #9
0
Файл: sim.py Проект: tt9024/huan
def feat_sel3(X,
              y,
              st_smp_cnt=16,
              lt_smp_cnt=200,
              wt_decay=0.25,
              wt_train=0.5,
              clf=None,
              cor100=0.05,
              vbose=False):
    n, m = X.shape
    assert n > lt_smp_cnt and lt_smp_cnt >= st_smp_cnt, 'len error'
    assert len(y) == n, 'X,y shape error'
    mt_smp_cnt = (lt_smp_cnt + st_smp_cnt) / 2
    mc, ms = wt_corr2d(X, y, wt_decay=wt_decay, whiten=True)
    mclt, mslt = wt_corr2d(X[-lt_smp_cnt:, :],
                           y[-lt_smp_cnt:],
                           wt_decay=0,
                           whiten=True)
    mcmt, msmt = wt_corr2d(X[-mt_smp_cnt:, :],
                           y[-mt_smp_cnt:],
                           wt_decay=0,
                           whiten=True)
    mcst, msst = wt_corr2d(X[-st_smp_cnt:, :],
                           y[-st_smp_cnt:],
                           wt_decay=0,
                           whiten=True)
    ixst = np.nonzero(mc * mcst > 0)[0]
    ixmt = np.nonzero(mc * mcmt > 0)[0]
    ixlt = np.nonzero(mc * mclt > 0)[0]
    #pdb.set_trace()
    ixa = np.intersect1d(np.intersect1d(ixst, ixmt), ixlt)  #cnadidate features
    if cor100 > 0:
        ixcor = np.nonzero(np.abs(mc) * np.sqrt(n) / 10 > cor100)[0]
        ixa = np.intersect1d(ixa, ixcor)
    if len(ixa) == 0:
        #nothing to be chosen
        return [7]
    if vbose:
        print 'ixst: ', len(ixst), ixst
        print 'ixmt: ', len(ixmt), ixmt
        print 'ixlt: ', len(ixlt), ixlt
        print 'ixcor:', len(ixcor), ixcor
        print 'got ixa:', len(ixa), ixa

    if wt_train is not None:
        wtlt = l1_reader.getwt(n - lt_smp_cnt, wt_train)
        wtmt = l1_reader.getwt(n - mt_smp_cnt, wt_train)
        wtst = l1_reader.getwt(n - st_smp_cnt, wt_train)
        wtlt /= np.sum(wtlt)
        wtmt /= np.sum(wtmt)
        wtst /= np.sum(wtst)
    else:
        wtlt = None
        wtmt = None
        wtst = None
    if clf is None:
        #clf = linear_model.RidgeCV(alphas=[0.1,0.5,1,2,5,10,20,40,60,100])
        clf = linear_model.RidgeCV()
    tol = 1e-10
    ixi = np.array([0])
    maxit = 100
    it = 0
    scb = 0
    scbp = []
    while len(ixa) > 0 and it < maxit:
        if vbose:
            print 'START ======= iter ', it, 'fs ', ixi, 'score ', scb, ' possible ', ixa
        sc = []
        for i0 in ixa:
            # try to find a best feature from idx
            ixt = np.r_[ixi, i0]
            X0 = X[:, ixt]
            sc0 = 0
            if vbose:
                print i0,
            for ct, wt in zip([lt_smp_cnt, mt_smp_cnt, st_smp_cnt],
                              [wtlt, wtmt, wtst]):
                sc0 += score_sign(clf,
                                  X0[:-ct, :],
                                  y[:-ct],
                                  X0[-ct:, :],
                                  y[-ct:],
                                  wt=wt)
                if vbose:
                    print ct, sc0,
            sc.append(sc0)
            if vbose:
                print
        # pick a best one
        ixch0 = np.argsort(sc)[-1]
        ixch = ixa[ixch0]
        if sc[ixch0] - scb > tol:
            if vbose:
                print 'adding ', ixch, ' score imp ', sc[ixch0] - scb
            scb = sc[ixch0]

            # but would removing existing features in ixi improve?
            while len(ixi) > 1:
                sc = []
                for i0 in np.arange(len(ixi) - 1) + 1:
                    # try to remove i0
                    if vbose:
                        print 'try removing '
                    sc0 = 0
                    X0 = X[:, np.r_[np.delete(ixi, i0), ixch]]
                    for ct, wt in zip([lt_smp_cnt, mt_smp_cnt, st_smp_cnt],
                                      [wtlt, wtmt, wtst]):
                        sc0 += score_sign(clf,
                                          X0[:-ct, :],
                                          y[:-ct],
                                          X0[-ct:, :],
                                          y[-ct:],
                                          wt=wt)
                    if vbose:
                        print ixi[i0], sc0
                    sc.append(sc0)

                ixrm0 = np.argsort(sc)[-1]
                if sc[ixrm0] < scb:
                    break
                ixrm = ixi[ixrm0 + 1]
                if vbose:
                    print 'removing ', ixrm, ' score imp ', sc[ixrm0] - scb
                ixi = np.delete(ixi, ixrm0 + 1)
                scb = sc[ixrm0]

            ixi = np.r_[ixi, ixch]
            ixa = np.delete(ixa, ixch0)
        else:
            ixa = []
        if vbose:
            print 'DONE ======= iter ', it, 'fs ', ixi, 'score ', scb
        it += 1

    if len(ixi) == 1:
        return [7]
    return ixi[1:]
Пример #10
0
def test(wb5m,if_plot=True) :
    #fd2=np.sum(wb5m[:,8*12+4:9*12+7,1],axis=1)
    #fd3=np.sum(wb5m[:,11*12+2:11*12+8,1],axis=1)
    fd0=np.sum(wb5m[:,6*12:8*12,1],axis=1)
    fd1=np.sum(wb5m[:,10*12+3:12*12,1],axis=1)
    fd2=np.sum(wb5m[:,18*12:19*12,1],axis=1)
    fd3=np.sum(wb5m[:,23*12-2:23*12+11,1],axis=1)

    # tuesday is tricky, changes a lot at the last year [-50:]
    fd4=np.sum(wb5m[:,(23+5)*12+5:(23+6)*12+2,1],axis=1)
    fd5=np.sum(wb5m[:,(23+10)*12+9:(23+12)*12+2,1],axis=1)
    fd6=np.sum(wb5m[:,(23+12)*12+6:(23+13)*12+2,1],axis=1)
    fd7=np.sum(wb5m[:,(23+18)*12+8:(23+19)*12+4,1],axis=1)
    fd8=np.sum(wb5m[:,(23+19)*12+4:(23+19)*12+8,1],axis=1)
    fd9=np.sum(wb5m[:,(23+21)*12+5:(23+22)*12+3,1],axis=1)

    # wednesday is also tricky, oscillates for the last 3 years
    fd10=np.sum(wb5m[:,(2*23+22)*12+3:(2*23+22)*12+11,1],axis=1)
    fd12=np.sum(wb5m[:,(2*23+18)*12+3:(2*23+19)*12+3,1],axis=1)

    fd11=np.sum(wb5m[:,(3*23+10)*12+9:(3*23+12)*12+2,1],axis=1)
    fd13=np.sum(wb5m[:,(3*23+17)*12+3:(3*23+18)*12,1],axis=1) # this feature is not import

    fd14=np.sum(wb5m[:,(4*23+0)*12-1:(4*23+0)*12+6,1],axis=1)  # has an overnight lr, note for roll
    fd15=np.sum(wb5m[:,-60-42:-60,1],axis=1) 

    ### whole days? 
    fd16=np.sum(wb5m[:,23*12*3+11:23*12*4,1],axis=1)
    fd17=np.sum(wb5m[:,23*12*1:23*12*3,1],axis=1)  # this is too volatile!

    # prev
    #fd18=np.r_[0, np.sum(wb5m[:-1,-60:-30,1],axis=1)] 
    #fd19=np.r_[0, fd18[:-1]]
    
    fd19=np.r_[np.zeros(2), np.sum(wb5m[:-2,-60:-30,1],axis=1)] 
    fd20=np.r_[np.zeros(18), np.sum(wb5m[:-18,-60:-30,1],axis=1)] # good negative corr at 18th week
    fd21=np.r_[np.zeros(32), np.sum(wb5m[:-32,-60:-30,1],axis=1)] # good positive corr at 32th week
    fd22=np.r_[np.zeros(35), np.sum(wb5m[:-35,-60:-30,1],axis=1)] # good positive corr at 35th week

    ## short range
    yy0=np.sum(wb5m[:,-60:-58,1],axis=1)
    yy1=np.sum(wb5m[:,-60:-58,3],axis=1)
    yy0_=np.sign(yy0)

    #X=np.vstack((fd0,fd1,fd2,fd3,fd4,fd5,fd6,fd7,fd8,fd9,fd10,fd11,fd12,fd13,fd14,fd15)).T

    # consider adding fd13 in, but it has a flip in 1 of the previous year
    #X=np.vstack((fd0,fd1,fd3,fd11,fd14)).T  # it seems that Monday,Thursday and Friday has the best
    #X=np.vstack((fd0,fd1,fd2,fd3,fd4,fd11,fd14,fd15,fd16).T  # it seems that Monday,Thursday and Friday has the best
    X=np.vstack((fd0,fd1,fd2,fd3,fd4,fd11,fd14,fd15,fd16)).T  # it seems that Monday,Thursday and Friday has the best
    Xvbs=get_Xvbs(wb5m[:,:,3])
    if len(Xvbs.shape)== 1:
        X=np.vstack((X.T,Xvbs)).T
    else :
        X=np.hstack((X,Xvbs))

    """
    # this vol0 doesn't seem to help
    Xv0=get_vol0(wb5m[:,:,4])
    if len(Xv0.shape)== 1:
        X=np.vstack((X.T,Xv0)).T
    else :
        X=np.hstack((X,Xv0))
    """

    #X=np.vstack((X.T,fd19,fd22)).T
    #X-=np.mean(X,axis=0) ; X/=np.std(X,axis=0); X=np.vstack((np.ones(len(fd0)),X.T)).T

    y=np.sum(wb5m[:,-60:-30,1],axis=1)
    wt=l1_reader.getwt(len(y),0.1)
    wt/=np.sum(wt)
    mu=np.dot(y,wt)
    sd=np.sqrt(np.dot(wt, (y-mu)**2))
    y0=ean.outlier(y,mu,sd,in_th=1,out_th=3)

    fd19=np.r_[np.zeros(2), y0[:-2]] 
    fd20=np.r_[np.zeros(18), y0[:-18]] # good negative corr at 18th week
    fd21=np.r_[np.zeros(32), y0[:-32]] # good positive corr at 32th week
    fd22=np.r_[np.zeros(35), y0[:-35]] # good positive corr at 35th week
    #X=np.vstack((X.T,fd19,np.sign(fd22)*np.abs(fd21))).T
    Xprv=np.vstack((fd19,np.sign(fd22)*np.abs(fd21))).T
    X=np.vstack((X.T,Xprv.T)).T
    xmu=np.mean(X,axis=0)
    xsd=np.std(X,axis=0)

    #X-=np.mean(X,axis=0) ; X/=np.std(X,axis=0); X=np.vstack((np.ones(len(fd0)),X.T)).T
    X-=xmu ; X/=xsd ; 
    print xmu, xsd
    X=np.vstack((np.ones(wb5m.shape[0]),X.T)).T
    #return X,xmu,xsd

    fig=pl.figure()
    ax=fig.add_subplot(2,1,1)
    axp=fig.add_subplot(2,1,2)

    mu=0 ; sd=1  # don't scale it
    #omp=linear_model.SGDRegressor(loss='huber',penalty='l1',alpha=1)
    #y0=np.sign(y0)

    #omp=linear_model.Lars()
    #omp=linear_model.OrthogonalMatchingPursuitCV()
    #omp=linear_model.LassoCV()
    omp=linear_model.RidgeCV(); 
    omp.fit(X[:-50,:],y0[:-50])
    yh=omp.predict(X[-50:,:]) * sd + mu
    if ax is not None:
        ax.plot(omp.coef_, '.-', label='-50')
    
    print np.corrcoef(yh, y0[-50:])[0,1]

    omp=linear_model.RidgeCV(); omp.fit(X[:-100,:],y0[:-100])
    yh=omp.predict(X[-100:-50,:]) * sd + mu
    print np.corrcoef(yh, y0[-100:-50])[0,1]
    if ax is not None:
        ax.plot(omp.coef_, '.-', label='-100:-50')

    omp=linear_model.RidgeCV(); omp.fit(X[:-200,:],y0[:-200])
    yh=omp.predict(X[-200:-100,:]) * sd + mu
    print np.corrcoef(yh, y0[-200:-100])[0,1]
    if ax is not None:
        ax.plot(omp.coef_, '.-', label='-200:-100')

    omp=linear_model.RidgeCV(); omp.fit(X[:-100,:],y0[:-100])
    yh=omp.predict(X[-100:,:]) * sd + mu
    print np.corrcoef(yh, y0[-100:])[0,1]
    if ax is not None:
        ax.plot(omp.coef_, '.-', label='-100')

    # look into the execution of the last 100 weeks
    yhh=yh.copy()
    ysd=yhh.std()
    yhh0=yh*yy0[-100:]
    ix1=np.nonzero(yhh0>0)[0]  # correct sign at first 10 minutes
    yhh[ix1]+= yy0[-100:][ix1]/yy0.std()*ysd*2

    # add the percentage of the high positive ones
    #th0=np.percentile(np.abs(yhh0[ix1]), 50)
    #ix00=np.nonzero(np.abs(yhh0[ix1])>th0)[0]
    #yhh[ix1[ix00]]+=yy0[-100:][ix1[ix00]]/yy0.std()*ysd*1

    #ix2=np.nonzero(yh*yy1[-100:]>0)[0]
    #yhh[ix2]+= np.sign(yy1[-100:][ix2])*ysd*.1
    yhh=np.sign(yhh)*(np.abs(yhh)**0.01)
    yhh/=np.max(np.abs(yhh))

    # yhh is the normalized size to trade
    # try sign
    #yhh=np.sign(yhh)

    
    ylr=np.cumsum(wb5m[-100:,-58:-30,1],axis=1)
    yraw=ylr[:,-1]

    pnl=(ylr.T*yhh).T
    #pnl=yraw*yhh
    
    # stop loss doesn't work
    th=0.0125
    #th=0.02
    tick=0.0001
    for i,(p0,yhh0) in enumerate(zip(pnl,np.abs(yhh))) :
        th1=th*yhh0
        #th1=th
        #ix=np.nonzero(p0<-th1)[0]
        ix=np.nonzero(p0>th1)[0]
        if len(ix)>0 :
            eix=np.nonzero(p0[ix[0]:] <= th1*1.2)[0]
            if len(eix) > 0 :
                eix=ix[0]+eix[0]
                #th0=p0[eix]-tick*yhh0
                th0=p0[eix]
                #print 'stop loss at ',ix[0],eix,p0[ix[0]],p0[eix], 'final pnl is ',pnl[i,-1],th0,'saved ', th0-pnl[i,-1]
                pnl[i,-1]=th0

    ltx=-1
    pnl0=pnl[:,ltx].copy()
    #pnl0=pnl[:,-10].copy()

    pnl=pnl[:,-1]
    ix=np.nonzero(pnl0<0)[0]
    ixp=np.nonzero(pnl0>0)[0]
    #for a0,a1,a2 in zip(yhh[ix],pnl[ix],pnl0[ix]) :
    #    print a0, a1, a2
    # let negative ones run longer

    ltx2=-1
    #print ltx2, ltx, np.sum(wb5m[-100:,-30+ltx+1:ltx2,1],axis=1)[ix]*yhh[ix]
    pnl0[ix]-=(np.sum(wb5m[-100:,-30+ltx+1+10:ltx2,1],axis=1)[ix]*yhh[ix])
    #pnl0[ix]+=(np.sum(wb5m[-100:,-30+ltx+1:-30+ltx+1+10,1],axis=1)[ix]*yhh[ix])


    pnl0[ixp]+=(np.sum(wb5m[-100:,-30+ltx+1+18:ltx2,1],axis=1)[ixp]*yhh[ixp])
    pnl0[ixp]-=(np.sum(wb5m[-100:,-30+ltx+1:-30+ltx+1+6,1],axis=1)[ixp]*yhh[ixp])
    pnl[ixp]=pnl0[ixp].copy()

    pnl[ix]=pnl0[ix].copy()

    axp.plot(np.cumsum(pnl), '.-',label='pnl=%.2f,shp=%.2f'%(np.sum(pnl), pnl.mean()/pnl.std()))

    ax.legend()
    axp.legend()
    return X,y0,yh,yhh,pnl