示例#1
0
 def getRollReg(file1, window=None, save_name=None):
     '''
     已过滤掉不能租window长度的数据
     '''
     logging.info(
         "----------------- start to calculate rolling regression-------------"
     )
     if save_name == None:
         save_name = 'test.csv'
     df1 = pd.read_csv(file1)
     stk_list = df1['stock_index'].unique().tolist()
     data_dir = tmp_data_dict.get('stock_feature')
     save_dir = os.path.join(data_dir, "rollRegression")
     df_reg = pd.DataFrame()
     if window == None:
         window = 5
     for i in stk_list:
         print(i)
         df2 = df1[df1['stock_index'] == i]
         df2 = df2[['stock_date', "adj_close"]]
         df3 = rolling_regression(df2, window, "stock_date", "adj_close")
         df3 = df3[df3['slope_num_in'] == window]
         save_file = os.path.join(save_dir, save_name)
         df3['stock_index'] = str(i).zfill(6)
         df_reg = df_reg.append(df3)
     df_reg = changeStockIndex(df_reg, 'stock_index')
     df_reg.to_csv(save_file, index=0)
     logging.info("-------------------save data {}".format(save_dir))
     return save_file
示例#2
0
def makeModelData(data_dir,columns_list):
    data_all = loadCombineData(data_dir)
    df4 = cleanData(data_all,columns_list).dropna()
    df_roll_reg = df4.groupby("stock_index").apply(lambda x: rolling_regression(x,regression_window,"stock_date","close"))
    
    df2 = df_roll_reg.reset_index(drop=True) 
    df2 = df2[df2["slope_num_in"] ==5]
    #df2["slopes"] = mergeData.regPN(df2,'slopes')["slopes"]
    df_final= df2[columns_list + ["slopes"]]
    return df_final
示例#3
0
def process(df1,start_date,end_date,max_min_stat_window,regre_window,regre_col):
    stock = DF_to_StockDataFrame(df1)
    stock = select_data(stock,start_date,end_date)
    stock_kdj_macd = stock_kdj(stock)
    #kdj_feature = setFeature(df_kdj,'tes',14,['kdjk','kdjj','kdjd'])
    df_max_min = rollingFutureMaxMin(stock,max_min_stat_window)
    #df_f2 = df_f1.merge(df_kdj, left_index=True, right_index=True)
    df_max_min = df_max_min.reset_index()
    df_max_min = rolling_regression(df_max_min,regre_window,'date',regre_col)    
    #df_max_min['slope_5'] = linear_slope['slopes']
    df_max_min = df_max_min.reset_index().dropna()
    df_max_min['stock_date'] = df_max_min.reset_index()['date'].astype(str)
    ## combine data
    cols_to_use = df_max_min.columns.difference(stock_kdj_macd.columns).tolist()+['stock_date']
    df_f2 = pd.merge(stock_kdj_macd,df_max_min[cols_to_use],on="stock_date")
    return df_f2
示例#4
0
    def getRollReg(file1,window=None):
        df1 = pd.read_csv(file1)
		stk_list = df1['stock_index'].unique().tolist()
		data_dir = tmp_data_dict.get('stock_feature')
		save_dir = os.path.join(data_dir,"rollRegression")
		window = 5
		for i in stk_list:
		    print(i)
		    df2 = df1[df1['stock_index']==i]
		    df2 = df2[['stock_date',"adj_close"]]
		    df3 = rolling_regression(df2,window,"stock_date","adj_close")
		    df3 = df3[df3['slope_num_in']==window]
		    save_name = '_'.join([str(i),'rollReg',str(window)])+'.csv'
		    save_file = os.path.join(save_dir,save_name)
		    df3['stock_index'] = str(i).zfill(6)
		    df3.to_csv(save_file,index=0)
示例#5
0
def dfRollReg(df_feature, window):
    df_roll_reg = df_feature.groupby("stock_index").apply(
        lambda x: rolling_regression(x, window, "stock_date", "close"))
    df_roll_reg.reset_index(drop=True, inplace=True)
    return df_roll_reg
示例#6
0
import pandas as pd
from davidyu_cfg import *
from functions.rolling_regression import *
from functions.day_history.rollReg import rollRegDayHis

data_file = "/home/davidyu/stock/data/SH_SZ_index/SH_index.csv"

df1 = pd.read_csv(data_file)
df1.columns = [x.split(".")[1] for x in df1.columns]
x = df1
window = 5
sort_col = "stock_date"
reg_col = "close"
df_rollreg = rolling_regression(x, window, sort_col, reg_col)

save_dir = tmp_data_dict.get("SH_index")
df_rollreg.round(3).to_csv(os.path.join(save_dir, "sh_index_rollReg.csv"),
                           index=0)
示例#7
0
df_all = pd.concat(frames)


def data_process(df2, columns_list):
    for col in columns_list:
        df2[col] = [x.replace("    ", "") for x in df2[col].tolist()]
        df2 = df2.replace("----", np.nan)
    return df2


columns_list = ["kdj_j", "kdj_k", "kdj_d", "macd_dif", "macd", "macd_dif_macd"]
df3 = data_process(df_all, columns_list).dropna()

window = 5
df_roll_reg = df3.groupby("stock_index").apply(
    lambda x: rolling_regression(x, window, "stock_date", "close"))

df2 = df_roll_reg.reset_index(drop=True)
df2 = df2[df2["slope_num_in"] == 5]
df2["slopes"] = mergeData.regPN(df2, 'slopes')["slopes"]
df3 = df2[[
    "kdj_k", "kdj_d", "kdj_j", "macd_dif", "macd", "macd_dif_macd", "slopes"
]]

tmp_path = raw_data_dir
save_file = "test.csv"
save_file_name = os.path.join(tmp_data_path, save_file)
df3.to_csv(save_file_name, index=0)

df2 = df1.replace("    ----", -999)
df2 = df1.replace("    ", "")
示例#8
0
df1.columns = [x.split(".")[1] for x in df1.columns.tolist()]
df1 = adjustStockPrice.adj_stock_price(df1)


stock = DF_to_StockDataFrame(df1)

feature_list = ['kdjk','kdjd','kdjj','macdh',"rsi_6","close"]


stock["rsi_6"]

window = 3

df_stock = stock_feature(stock,feature_list)
df3 = rolling_regression(df_stock,window,"stock_date","close")

cols_to_use = df3.columns.difference(df_stock.columns)
df_merge = pd.merge(df_stock,df3[["stock_date"]+cols_to_use.tolist()],on=("stock_date"))

df_merge[df_merge["rsi_6"]>95]


df_stock = stock_kdj(stock)



def cut_list_pos_neg(seq):
	cut = 0
	seq_list = []
    try:
示例#9
0
def process():
    stock = DF_to_StockDataFrame(df1)
    stock_kdj_macd = stock_kdj(stock)
    kdj_thre = 0
    buy_num = 1000
    sale_days_threshold = 5
    # 第二天用高于昨日最低价的百分之多少买入
    buy_increase_ratio = 0.01

    stock['next_low_1'] = stock['low'].shift(-1).tolist()
    stock,sale_columns = nextPrice(stock,sale_days_threshold)
    stock['stock_date'] = df1['stock_date'].astype(str).tolist()

    stock1 = stock[stock['kdjj']<kdj_thre]
    stock1['buy_price_now'] = stock1['low']*(1+ buy_increase_ratio/100)
    stock1['if_can_buy'] = stock1['buy_price_now'] - stock1['next_low_1'] 
    stock1[stock1['if_can_buy']>0].shape[0]/stock1.shape[0]
    #stock1 = stock[(stock['macd']<kdj_thre)&(stock['kdjj']<kdj_thre)]
    
    #stock1['buy_price'] = stock1['next_low_1'] * 1.005
    stock1['buy_price'] = stock1['buy_price_now']
    
    stock1['positive_price'] = stock1['buy_price'] * 1.005
    stock1['future_max'] = stock1[sale_columns].max(axis=1)
    
    stock1['max_can_sale'] =  stock1['future_max']-stock1['positive_price']
    stock1[stock1['max_can_sale']>0].shape[0]/stock1.shape[0]



    stock['buy'] = stock['kdjj']
    
    stock1 = stock[stock['kdjj']<kdj_thre]
        
    
    stock1 = stock[stock['kdjj']<20]
    a1=stock1['next_low']/stock1['close']
    a1.mean()
    a1=stock1['next_low']/stock1['low']
    a1.mean()

    from sklearn import linear_model
    from sklearn.metrics import explained_variance_score,\
            mean_absolute_error,\
            mean_squared_error,\
            median_absolute_error,r2_score
    reg = linear_model.LinearRegression(fit_intercept=True,normalize=False)
    stock1 = stock[['high','low','open','close','next_low']].dropna()
    x = stock1[['high','low','open','close']].values
    y = stock1.next_low.values
    reg.fit(x,y)

    mean_squared_error(y,reg.predict(x)) 
    r2_score(y,reg.predict(x))

    cols_to_use = stock_kdj_macd.columns.difference(stock.columns).tolist()+['stock_date']
    df_f2 = pd.merge(stock,stock_kdj_macd[cols_to_use],on="stock_date")

    #stock = select_data(stock,start_date,end_date)
    stock_kdj_macd = stock_kdj(stock)
    pd.merge(stock_kdj_macd,stock,on="stock_date")

    #kdj_feature = setFeature(df_kdj,'tes',14,['kdjk','kdjj','kdjd'])
    df_max_min = rollingFutureMaxMin(stock,max_min_stat_window)
    #df_f2 = df_f1.merge(df_kdj, left_index=True, right_index=True)
    df_max_min = df_max_min.reset_index()
    df_max_min = rolling_regression(df_max_min,regre_window,'date',regre_col)    
    #df_max_min['slope_5'] = linear_slope['slopes']
    df_max_min = df_max_min.reset_index().dropna()
    df_max_min['stock_date'] = df_max_min.reset_index()['date'].astype(str)
    ## combine data
    cols_to_use = df_max_min.columns.difference(stock_kdj_macd.columns).tolist()+['stock_date']
    df_f2 = pd.merge(stock_kdj_macd,df_max_min[cols_to_use],on="stock_date")
    return df_f2