Пример #1
0
dataframe_list = []
feature_list = ['kdjk','kdjd','kdjj', \
        'macdh',"macds","macd", \
        'cr', 'cr-ma1', 'cr-ma2', 'cr-ma3',\
        "rsi_6","rsi_12","rsi_24", \
        "wr_6","wr_10","wr_20","close"]
window = 5
file_name = "/home/davidyu/stock/data/test/SH_index.csv"
df_feature = dfIndex(file_name, feature_list)
window = 5
df_roll_reg = dfRollReg(df_feature, window)
cols_to_use = df_roll_reg.columns.difference(df_feature.columns)
df_merge = pd.merge(df_feature,
                    df_roll_reg[["stock_date"] + cols_to_use.tolist()],
                    on=("stock_date"))

# merge data
df_merge1 = df_merge[df_merge["slope_num_in"] == window]

df_merge1["slopes"] = mergeData.regPN(df_merge1, 'slopes')["slopes"]
df_merge1 = df_merge1[feature_list + ["slopes"]]

# save data
tmp_data_path = os.path.join(data_path, "test")
save_file = "SH_index_kdj_macd_rsi_test.csv"
save_file_name = os.path.join(tmp_data_path, save_file)
df_merge1.drop("close", axis=1).to_csv(save_file_name, index=0)

###################################################################
Пример #2
0
from davidyu_cfg import *
from functions.stock_feature.mergeData import mergeData
data_file = "/home/davidyu/stock/data/test/day_history_kdj_macd_rsi_test.csv"

feature_list = ['kdjk', 'kdjd', 'kdjj', 'macdh', "rsi_6"]
df1 = pd.read_csv(data_file)
df_sample = df1.sample(frac=0.4)[feature_list + ["slopes"]]

df_sample["slopes"] = mergeData.regPN(df_sample, 'slopes')["slopes"]

df_sample.round(3).to_csv(
    "/home/davidyu/stock/data/test/day_history_kdj_macd_rsi_sample.csv",
    index=0)

###########################################################################
###########################################################################

df2 = df1[df1["rsi_6"] > 93]

a1 = df2.index.tolist()

c = []  #生成一个空列表,用来放新列表
for i in range(len(a1) - 1):
    b = a1[i + 1] - a1[i]
    if b != 1:
        c.append(a1[i])

df3 = df1.iloc[c, :]

df3.slopes.max()
df3.slopes.min()
Пример #3
0
    ## moving average data
    mvAvgFile = "mvAvg_" + raw_data_name
    mvAvgDf = pd.read_csv(os.path.join(feature_dir, "mvAvg", mvAvgFile))
    ## history price data
    historyPriceFile = "historyPrice_" + raw_data_name
    historyPriceDf = pd.read_csv(
        os.path.join(feature_dir, "historyPrice", historyPriceFile))
    ## dazongjiaoyi  || left join
    #dazongjiaoyiFile = "dazongjiaoyi_"+raw_data_name
    #dazongjiaoyiDf = pd.read_csv(os.path.join(feature_dir,"dazongjiaoyi",dazongjiaoyiFile))
    ####-----------------------------------------------------####
    ####-------------------- merge data ---------------------####
    a1 = pd.merge(rollRegDf, mvAvgDf, on=("stock_date", "stock_index"))
    df_model = pd.merge(a1, historyPriceDf)
    df_model = df_model.dropna()
    df_model = mergeData.regPN(df_model, 'slopes')
    file_name = featureName + "_" + str(roll_reg_window) + '_' + raw_data_name
    df_model.to_csv(os.path.join(save_dir, file_name), index=0)

#aa =  pd.merge(df_model,dazongjiaoyiDf,how='left',on=("stock_date","stock_index"))
#aa['dazongjiaoyi_cnt'] = aa['dazongjiaoyi_cnt'].fillna(0)
'''
#df_model = df_model[df_model['dazongjiaoyi_cnt']>0]
#df_model = df_model[df_model['stock_date']>'2013-01-01']

feature_cols = np.load("feature_columns.npy").tolist()


df_x = df_model[df_model.columns.intersection(feature_cols)]

df_y = df_model.slopes.values
Пример #4
0
    for col in columns_list:
        df2[col] = [x.replace("    ", "") for x in df2[col].tolist()]
        df2 = df2.replace("----", np.nan)
    return df2


columns_list = ["kdj_j", "kdj_k", "kdj_d", "macd_dif", "macd", "macd_dif_macd"]
df3 = data_process(df_all, columns_list).dropna()

window = 5
df_roll_reg = df3.groupby("stock_index").apply(
    lambda x: rolling_regression(x, window, "stock_date", "close"))

df2 = df_roll_reg.reset_index(drop=True)
df2 = df2[df2["slope_num_in"] == 5]
df2["slopes"] = mergeData.regPN(df2, 'slopes')["slopes"]
df3 = df2[[
    "kdj_k", "kdj_d", "kdj_j", "macd_dif", "macd", "macd_dif_macd", "slopes"
]]

tmp_path = raw_data_dir
save_file = "test.csv"
save_file_name = os.path.join(tmp_data_path, save_file)
df3.to_csv(save_file_name, index=0)

df2 = df1.replace("    ----", -999)
df2 = df1.replace("    ", "")
df3 = df2[df2["kdj_j"] > -900]

df2["kdj_j"] = [x.replace("    ", "") for x in df2["kdj_j"].tolist()]
df2 = df2.replace("----", -999)
Пример #5
0
    os.path.join(tmp_data_dir, "financial_report_ml_test_data.csv"))
df_merge3 = df_merge3.replace(-9999, np.nan)  ## 180


def df_corr():
    ## calculate the correlation of all the fin report index
    df_merge3.corr(method='pearson')['change_rate'].sort_values()


## transform data for feature selection
X = df_merge3[df_merge3.columns.tolist()[:-1]].values
imp = Imputer(missing_values=np.nan, strategy='mean', axis=0)
#imp.fit(X)
X = imp.fit_transform(X)
#y = df_merge3['change_rate'].values
df_y = mergeData.regPN(df_merge3, "change_rate")
y = df_y['change_rate']


## 正则化 , featureSelection
def featureSelectSVC(X, y):
    lsvc = LinearSVC(C=0.0001, penalty="l1", dual=False).fit(X, y)
    df_feature_select = pd.DataFrame(lsvc.coef_).T
    df_feature_select['feature'] = df_merge3.columns.tolist()[:-1]
    df_feature_select.columns = ["weight", "feature"]
    df_fea1 = df_feature_select[df_feature_select["weight"] != 0]
    df_fea2 = df_fea1.sort_values("weight")
    uni_feature = list(
        set([x.split("_")[0] for x in df_fea1['feature'].tolist()]))
    return df_fea2, uni_feature