def spread_mean(stock1, stock2, i, table): if table.model_type.iloc[i] == 'model1': model = 'H2' elif table.model_type.iloc[i] == 'model2': model = 'H1*' elif table.model_type.iloc[i] == 'model3': model = 'H1' stock1 = stock1[i, :150] stock2 = stock2[i, :150] b1 = table.w1.iloc[i] b2 = table.w2.iloc[i] y = np.vstack([stock1, stock2]).T logy = np.log(y) # print(logy) lyc = logy.copy() p = order_select(logy, 5) #print('p:',p) _, _, para = para_vecm(logy, model, p) logy = np.mat(logy) y_1 = np.mat(logy[p:]) dy = np.mat(np.diff(logy, axis=0)) for j in range(len(stock1) - p - 1): if model == 'H1': if p != 1: delta = para[0] * para[1].T * y_1[j].T + para[2] * np.hstack( [dy[j:(j + p - 1)].flatten(), np.mat([1])]).T else: delta = para[0] * para[1].T * y_1[j].T + para[2] * np.mat([1]) elif model == 'H1*': if p != 1: delta = para[0] * para[1].T * np.hstack([ y_1[j], np.mat([1]) ]).T + para[2] * dy[j:(j + p - 1)].flatten().T else: delta = para[0] * para[1].T * np.hstack([y_1[j], np.mat([1])]).T elif model == 'H2': if p != 1: delta = para[0] * para[1].T * y_1[j].T + para[2] * dy[j:( j + p - 1)].flatten().T else: delta = para[0] * para[1].T * y_1[j].T else: print('Errrrror') break dy[j + p, :] = delta.T y_1[j + 1] = y_1[j] + delta.T b = np.mat([[b1], [b2]]) spread = b.T * lyc[p:].T spread_m = np.array(b.T * y_1.T).flatten() return spread_m, spread
def get_Estd(stock1, stock2, i, table, dy=True, D=16): if table.model_type.iloc[i] == 'model1': model = 'H2' elif table.model_type.iloc[i] == 'model2': model = 'H1*' elif table.model_type.iloc[i] == 'model3': model = 'H1' stock1 = stock1[i, :150] stock2 = stock2[i, :150] b1 = table.w1.iloc[i] b2 = table.w2.iloc[i] b = np.mat([[b1], [b2]]) y = np.vstack([stock1, stock2]).T logy = np.log(y) #np.log(y) p = order_select(logy, 5) u, A, _ = para_vecm(logy, model, p) constant = np.mat(A[:, 0]) A = A[:, 1:] l = A.shape[1] extend = np.hstack([np.identity(l - 2), np.zeros([l - 2, 2])]) newA = np.vstack([A, extend]) if not dy: lagy = logy[p - 1:-1, :] for i in range(1, p): lagy = np.hstack([lagy, logy[p - 1 - i:-i - 1, :]]) MatrixA = np.mat(A) MatrixLagy = np.mat(lagy) Estimate_logy = MatrixA * MatrixLagy.T + constant e = logy[p:, :].T - Estimate_logy var = e * e.T / e.shape[1] else: var = u * u.T / u.shape[1] NowCoef = np.mat(np.eye(len(newA))) Evar = var.copy() for i in range(149): NowCoef = newA * NowCoef Evar = Evar + NowCoef[:2, :2] * var * NowCoef[:2, :2].T Evar = b.T * Evar * b return np.sqrt(Evar)
def formation_table(Smin, inNum, costS, cost, os, cs, MaxV, OpenD, Min_cp, Max_tp): LSmin = np.log(Smin) #已捨棄前16分鐘與最後五分鐘的股價取log #LSmin = Smin maxcompanynu = Smin.shape[1] #找出有多少檔 ind = mt.Binal_comb(range(maxcompanynu)) ind = np.hstack((ind, np.zeros([ind.shape[0], 7]))) #ind.columns = [0:S1_inx,1:S2_inx,2:opt_q, 3:Johansen intercept, 4:Johansen slope, 5:std,6:Model,7:W1,8:W2] DailyNum = len(Smin) cy = np.zeros([DailyNum, ind.shape[0]]) # cy為Naturn Log共整合序列,以Capital Weight構成 cy_mean = np.zeros([DailyNum, ind.shape[0]]) # cy_mean為共整合序列均值,以Capital Weight構成 B = np.zeros([2, ind.shape[0]]) #B為共整合係數 CapitW = np.zeros([2, ind.shape[0]]) #CW為資金權重Capital Weight #IntegerB = np.zeros([2,ind.shape[0]]) #IB為CW整數化後的共整合係數 #start_time = time.time() for mi in range(ind.shape[0]): #for mi in range(1): rowS = LSmin.iloc[0:inNum, [int(ind[mi, 0]), int(ind[mi, 1])]] #150分鐘 rowLS = LSmin.iloc[:DailyNum, [int(ind[mi, 0]), int(ind[mi, 1])]] #250分鐘 #stock1 = Smin.iloc[inNum-1,[int(ind[mi,0])]] #stock2 = Smin.iloc[inNum-1,[int(ind[mi,1])]] ind[mi, 0:2] = rowS.columns.values rowAS = np.array(rowS) # 配適 VAR(P) 模型 ,並利用BIC選擇落後期數,max_p意味著會檢查2~max_p try: max_p = 5 p = order_select(rowAS, max_p) #ADF TEST if p < 1: continue # adf test # portmanteau test model = VAR(rowAS) if model.fit(p).test_whiteness(nlags=5).pvalue < 0.05: continue # Normality test if model.fit(p).test_normality().pvalue < 0.05: continue opt_model = jci.JCI_AutoSelection(rowAS, p - 1) #如果有共整合,紀錄下Model與opt_q ind[mi, 2] = p - 1 ind[mi, 6] = opt_model F_a, F_b, F_ct, F_ut, F_gam, ct, omega_hat = jci.JCItestpara_spilCt( rowAS, opt_model, p - 1) # ind[mi,9] = F_a # ind[mi,10] = F_b # ind[mi,11] = F_ct # ind[mi,12] = F_ut # ind[mi,13] = F_gam # ind[mi,14] = ct # ind[mi,15] = omega_hat Com_para = [] Com_para.append(F_a) Com_para.append(F_b) Com_para.extend(F_ct) #把 arrary.shape(2,1) 的數字放進 shape(2,) 的Serires #取出共整合係數 B[:, mi] = pd.DataFrame(F_b).stack() #將共整合係數標準化,此為資金權重Capital Weight CapitW[:, mi] = B[:, mi] / np.sum(np.absolute(B[:, mi])) ind[mi, 7] = CapitW[0, mi] ind[mi, 8] = CapitW[1, mi] ''' #將資金權重,依股價轉為張數權重 S1 = CapitW[0][mi]/float(stock1) S2 = CapitW[1][mi]/float(stock2) #將張數權重,做最簡整數比,要求範圍是最大張數+1 optXY = mt.simp_frac(S1,S2,MaxV+1) #如果最簡整數比出現[ (MaxV+1) / 1 ] or [ 1 / (MaxV+1) ] 就剃除 #張數權重整數化後,絕對值小於5的設1(通過),絕對值大於6的設0(沒通過) if abs(optXY[0]) <= MaxV and abs(optXY[1]) <= MaxV: ind[mi,4] = 1 IntegerB[:,mi] = optXY[:] ind[mi,7] = optXY[0] ind[mi,8] = optXY[1] ''' #計算Spread的時間趨勢均值與標準差 Johansen_intcept, Johansen_slope = jci.Johansen_mean( F_a, F_b, F_gam, F_ct, p - 1) Johansen_var_correct = jci.Johansen_std_correct( F_a, F_b, F_ut, F_gam, p - 1) Johansen_std = np.sqrt(Johansen_var_correct) ind[mi, 3] = Johansen_intcept ind[mi, 4] = Johansen_slope ind[mi, 5] = Johansen_std SStd = Johansen_std cy_mean[:, mi] = Johansen_intcept + Johansen_slope * np.linspace( 0, 249, 250) #以資金權重建構Naturn Log共整合序列 cy[:, mi] = pd.DataFrame(np.mat(rowLS) * np.mat(CapitW[:, mi]).T).stack() #拿共整合序列拿去檢定,ADF單根檢定回傳1代表無單根(定態),0代表有單根(非定態) #ind[mi,5] = mt.ADFtest_TR(cy[OpenD-1:inNum,mi], opt_p-1 , 0.05) #如果收斂點在Trading Period,設為0(沒通過、不交易),反之設為1 #if converg_Point < inNum: #ind[mi,10] = converg_Point #Spend_time = time.time() - start_time ''' #畫個圖確認一下 print(ind[mi,0:2]) import matplotlib.pyplot as plt plotx = [i for i in range(DailyNum)] CL = np.zeros((DailyNum,5)) CL [:,2] = cy_mean[:,mi] CL [:,1] = cy_mean[:,mi]+SStd*os CL [:,0] = cy_mean[:,mi]+SStd*cs CL [:,3] = cy_mean[:,mi]-SStd*os CL [:,4] = cy_mean[:,mi]-SStd*cs plt.plot(plotx,cy[:,mi],plotx,CL) plt.show() ''' except: continue dd = np.zeros([ind.shape[0], 1]) test_Model = ind[:, 6] != 0 dd = test_Model ind_select = ind[dd, :] #排除沒有共整合關係的配對 return ind_select
def daily_procces(Smin, inNum, costS, cost, os, cs, MaxV, OpenD, Min_cp, Max_tp): ''' #Debug 時使用的參數 Smin = SPmin.iloc[DailyNum*di:DailyNum*(di+1),:].to_numpy() inNum,costS,cost,os,cs,MaxV,OpenD = indataNum,CostS,Cost,Os,Fs,MaxVolume,OpenDrop Min_cp, Max_tp = Min_c_p, Max_t_p ''' LSmin = np.log(Smin) #已捨棄前16分鐘與最後五分鐘的股價取log maxcompanynu = Smin.shape[1] #找出有多少檔 ind = mt.Binal_comb(range(maxcompanynu)) ind = np.hstack((ind, np.zeros([ind.shape[0], 9]))) #ind.columns = [0:S1_inx,1:S2_inx,2:opt_q, 3:modelH Check, 4:整數 Check, 5:ADF Check,6:Model,7:IB1張數,8:IB2,9:SStd,10:converg_point Check] DailyNum = len(Smin) cy = np.zeros([DailyNum, ind.shape[0]]) # cy為Naturn Log共整合序列,以Capital Weight構成 cy_mean = np.zeros([DailyNum, ind.shape[0]]) # cy_mean為共整合序列均值,以Capital Weight構成 B = np.zeros([2, ind.shape[0]]) #B為共整合係數 CapitW = np.zeros([2, ind.shape[0]]) #CW為資金權重Capital Weight IntegerB = np.zeros([2, ind.shape[0]]) #IB為CW整數化後的共整合係數 #start_time = time.time() for mi in range(ind.shape[0]): #for mi in range(1): rowS = LSmin.iloc[0:inNum, [int(ind[mi, 0]), int(ind[mi, 1])]] rowLS = LSmin.iloc[:DailyNum, [int(ind[mi, 0]), int(ind[mi, 1])]] stock1 = Smin.iloc[inNum - 1, [int(ind[mi, 0])]] stock2 = Smin.iloc[inNum - 1, [int(ind[mi, 1])]] ind[mi, 0:2] = rowS.columns.values rowAS = np.array(rowS) # 配適 VAR(P) 模型 ,並利用BIC選擇落後期數,max_p意味著會檢查2~max_p try: max_p = 5 p = order_select(rowAS, max_p) opt_model = jci.JCI_AutoSelection(rowAS, p - 1) #如果有共整合,紀錄下Model與opt_q ind[mi, 2] = p - 1 ind[mi, 6] = opt_model if opt_model == 4 or opt_model == 5: F_a, F_b, F_ct, F_ut, F_gam, ct, omega_hat = jci.JCItestpara_spilCt( rowAS, opt_model, p - 1) Com_para = [] Com_para.append(F_a) Com_para.append(F_b) Com_para.extend(F_ct) #把 arrary.shape(2,1) 的數字放進 shape(2,) 的Serires #取出共整合係數 B[:, mi] = pd.DataFrame(F_b).stack() #將共整合係數標準化,此為資金權重Capital Weight CapitW[:, mi] = B[:, mi] / np.sum(np.absolute(B[:, mi])) #將資金權重,依股價轉為張數權重 S1 = CapitW[0][mi] / float(stock1) S2 = CapitW[1][mi] / float(stock2) #將張數權重,做最簡整數比,要求範圍是最大張數+1 optXY = mt.simp_frac(S1, S2, MaxV + 1) #如果最簡整數比出現[ (MaxV+1) / 1 ] or [ 1 / (MaxV+1) ] 就剃除 #張數權重整數化後,絕對值小於5的設1(通過),絕對值大於6的設0(沒通過) if abs(optXY[0]) <= MaxV and abs(optXY[1]) <= MaxV: ind[mi, 4] = 1 IntegerB[:, mi] = optXY[:] ind[mi, 7] = optXY[0] ind[mi, 8] = optXY[1] #計算Spread的時間趨勢均值與標準差 Johansen_intcept, Johansen_slope = jci.Johansen_mean( F_a, F_b, F_gam, F_ct, p - 1) Johansen_var_correct = jci.Johansen_std_correct( F_a, F_b, F_ut, F_gam, p - 1) Johansen_std = np.sqrt(Johansen_var_correct) ind[mi, 9] = Johansen_std cy_mean[:, mi] = Johansen_intcept + Johansen_slope * np.linspace( 0, 249, 250) #以資金權重建構Naturn Log共整合序列 cy[:, mi] = pd.DataFrame( np.mat(rowLS) * np.mat(CapitW[:, mi]).T).stack() #拿共整合序列拿去檢定,ADF單根檢定回傳1代表無單根(定態),0代表有單根(非定態) #ind[mi,5] = mt.ADFtest_TR(cy[OpenD-1:inNum,mi], opt_p-1 , 0.05) #如果收斂點在Trading Period,設為0(沒通過、不交易),反之設為1 #if converg_Point < inNum: #ind[mi,10] = converg_Point #Spend_time = time.time() - start_time except: continue dd = np.zeros([ind.shape[0], 1]) test_Inter = ind[:, 4] == 1 #test_ADF = ind[:,5]==1 test_Model = ind[:, 6] >= 4 #挑出model4&5交易 #test_converg = ind[:,10]>0 #dd = test_Inter & test_ADF & test_Model & test_converg dd = test_Inter & test_Model OMinx = ind[dd, :] cy = cy[:, dd] cy_mean = cy_mean[:, dd] IntegerB = IntegerB[:, dd] DailyResult = np.zeros((OMinx.shape[0], 17)) DailyResult[:, 0:2] = OMinx[:, 0:2] DailyResult[:, 2:5] = OMinx[:, 6:9] DailyResult[:, 5] = OMinx[:, 10] #DailyResult=[S1,S2,model,SFx資金權重,SFy,Cconverg_point收斂點,... #DailyResult(:,6:11) # ...,總獲利,平倉獲利,停損獲利,換日強停獲利,換日強停虧損,... #DailyResult(:,11:17) # ...,開倉次數,平倉次數,停損次數,換日強停獲利次數,換日強停虧損次數,向上(1)/向下(-1)] for pi in range(OMinx.shape[0]): SStd = OMinx[pi, 9] #標準差 mean_slope = cy_mean[inNum, pi] - cy_mean[0, pi] #Con_Point = int(DailyResult[pi,5]) smin = Smin[[str(int(OMinx[pi, 0])), str(int(OMinx[pi, 1]))]][inNum:DailyNum] ''' #畫個圖確認一下 import matplotlib.pyplot as plt plotx = [i for i in range(DailyNum)] CL = np.zeros((DailyNum,5)) CL [:,2] = cy_mean[:,pi] CL [:,1] = cy_mean[:,pi]+SStd*os CL [:,0] = cy_mean[:,pi]+SStd*cs CL [:,3] = cy_mean[:,pi]-SStd*os CL [:,4] = cy_mean[:,pi]-SStd*cs plt.plot(plotx,cy[:,pi],plotx,CL) ''' if SStd * os <= costS: continue elif SStd * os > costS and mean_slope > 0: #and Con_Point < Min_cp: print( mt.trade_up(cy[inNum:DailyNum, pi], cy_mean[inNum:DailyNum, pi], np.array(smin), IntegerB[:, pi], SStd, cost, os, cs, Max_tp)) # DailyResult[pi, 6:11]=ProfitU # DailyResult[pi, 11:16]=CountU # DailyResult[pi, 16] = 1 elif SStd * os > costS and mean_slope < 0: # and Con_Point < Min_cp: print( mt.trade_down(cy[inNum:DailyNum, pi], cy_mean[inNum:DailyNum, pi], np.array(smin), IntegerB[:, pi], SStd, cost, os, cs, Max_tp)) # DailyResult[pi, 6:11]=ProfitD # DailyResult[pi, 11:16]=CountD # DailyResult[pi, 16] = -1 return DailyResult
def cointegration_weight(stock1, stock2): # 開啟matlab引擎 #eng=matlab.engine.start_matlab() # 選擇適合的 VECM model,並且檢定 formation period 是否有結構性斷裂,並刪除該配對,其餘配對則回傳共整合係數。 #rank = 1 #t1 = int(len(min_price)*3/4) # 一天的時間長度(偵測兩天中間是否有結構性斷裂) local_select_model = [] local_weight = [] local_name = [] local_pval = [] # stock1 = min_price.iloc[:,i] # stock2 = min_price.iloc[:,j] # stock1_name = min_price.columns.values[i] # stock2_name = min_price.columns.values[j] z = (np.vstack([stock1, stock2]).T) model = VAR(z) p = order_select(z, 5) #p = int(model.select_order(5).bic) # VAR 至少需落後1期 if p < 1: return 0, 0 # portmanteau test if model.fit(p).test_whiteness(nlags=5).pvalue < 0.05: return 0, 0 # Normality test if model.fit(p).test_normality().pvalue < 0.05: return 0, 0 #r1 = eng.rank_jci( matlab.double(z.tolist()) , 'H2' , (p-1) ) #r2 = eng.rank_jci( matlab.double(z.tolist()) , 'H1*' , (p-1)) #r3 = eng.rank_jci( matlab.double(z.tolist()) , 'H1' , (p-1) ) r1 = rank(pd.DataFrame(z), 'H2', p) r2 = rank(pd.DataFrame(z), 'H1*', p) r3 = rank(pd.DataFrame(z), 'H1', p) #r4 = rank( pd.DataFrame(z) , 'H*' , p ) if r3 > 0: # 在 model 3 上有 rank if r2 > 0: # 在 model 2 上有 rank if r1 > 0: # select model 1 and model 2 and model 3 #lambda_model2 = eng.eig_jci( matlab.double(z.tolist()) , 'H1*' , (p-1) , r2 ) #lambda_model3 = eng.eig_jci( matlab.double(z.tolist()) , 'H1' , (p-1) , r2 ) lambda_model2 = eig(pd.DataFrame(z), 'H1*', p, r2) lambda_model3 = eig(pd.DataFrame(z), 'H1', p, r2) test = np.log( lambda_model2 / lambda_model3) * (len(stock1) - p) if test <= 0: raise ValueError('test value error') if test > 3.8414: #bp1 = chow_test( z , t1 , p , 'H1' , r3 ) #if bp1 == 0: local_select_model.append('model3') return weigh(pd.DataFrame(z), 'H1', p, r3) #weight.append( eng.coin_jci( matlab.double(z.tolist()) , 'H1' , (p-1) , r3 ) ) # local_weight.append( weigh( pd.DataFrame(z) , 'H1' , p , r3 ) ) # local_name.append([stock1_name,stock2_name]) # local_pval.append( vecm_pvalue('model3', vecm( pd.DataFrame(z),'H1',p)[0][0] ) ) else: #lambda_model1 = eng.eig_jci( matlab.double(z.tolist()) , 'H2' , (p-1) , r1 ) lambda_model1 = eig(pd.DataFrame(z), 'H2', p, r1) test = np.log( lambda_model1 / lambda_model2) * (len(stock1) - p) if test > 3.8414: #bp1 = chow_test( z , t1 , p , 'H1*' , r2 ) #if bp1 == 0: # local_select_model.append('model2') #weight.append( eng.coin_jci( matlab.double(z.tolist()) , 'H1*' , (p-1) , r2 ) ) return weigh(pd.DataFrame(z), 'H1*', p, r2) # local_weight.append( weigh( pd.DataFrame(z) , 'H1*' , p , r2 ) ) # local_name.append([stock1_name,stock2_name]) # local_pval.append( vecm_pvalue('model2',vecm(pd.DataFrame(z),'H1*',p)[0][1] ) ) else: #bp1 = chow_test( z , t1 , p , 'H2' , r1 ) #if bp1 == 0: # local_select_model.append('model1') #weight.append( eng.coin_jci( matlab.double(z.tolist()) , 'H2' , (p-1) , r1 ) ) return weigh(pd.DataFrame(z), 'H2', p, r1) # local_weight.append( weigh( pd.DataFrame(z) , 'H2' , p , r1 ) ) # local_name.append([stock1_name,stock2_name]) # local_pval.append( vecm_pvalue('model1',vecm(pd.DataFrame(z),'H2',p)[0][0] ) ) else: # select model 2 and model 3 #lambda_model2 = eng.eig_jci( matlab.double(z.tolist()) , 'H1*' , (p-1) , r2 ) #lambda_model3 = eng.eig_jci( matlab.double(z.tolist()) , 'H1' , (p-1) , r2 ) lambda_model2 = eig(pd.DataFrame(z), 'H1*', p, r2) lambda_model3 = eig(pd.DataFrame(z), 'H1', p, r2) test = np.log( lambda_model2 / lambda_model3) * (len(stock1) - p) if test <= 0: raise ValueError('test value error') if test > 3.8414: #bp1 = chow_test( z , t1 , p , 'H1' , r3 ) #if bp1 == 0: # local_select_model.append('model3') #weight.append( eng.coin_jci( matlab.double(z.tolist()) , 'H1' , (p-1) , r3 ) ) return weigh(pd.DataFrame(z), 'H1', p, r3) # local_weight.append( weigh( pd.DataFrame(z) , 'H1' , p , r3 ) ) # local_name.append([stock1_name,stock2_name]) # local_pval.append( vecm_pvalue('model3',vecm(pd.DataFrame(z),'H1',p)[0][0] ) ) else: #bp1 = chow_test( z , t1 , p , 'H1*' , r2 ) #if bp1 == 0: # local_select_model.append('model2') #weight.append( eng.coin_jci( matlab.double(z.tolist()) , 'H1*' , (p-1) , r2 ) ) return weigh(pd.DataFrame(z), 'H1*', p, r2) # local_weight.append( weigh( pd.DataFrame(z) , 'H1*' , p , r2 ) ) # local_name.append([stock1_name,stock2_name]) # local_pval.append( vecm_pvalue('model2',vecm(pd.DataFrame(z),'H1*',p)[0][1] ) ) else: # 只在 model 3 上有rank #bp1 = chow_test( z , t1 , p , 'H1' , r3 ) #if bp1 == 0: # local_select_model.append('model3') #weight.append( eng.coin_jci( matlab.double(z.tolist()) , 'H1' , (p-1) , r3 ) ) return weigh(pd.DataFrame(z), 'H1', p, r3) # local_weight.append( weigh( pd.DataFrame(z) , 'H1' , p , r3 ) ) # local_name.append([stock1_name,stock2_name]) # local_pval.append( vecm_pvalue('model3',vecm(pd.DataFrame(z),'H1',p)[0][0] ) ) return 0, 0