Exemplo n.º 1
0
 def __process_df(df):
     df1 = df.drop_duplicates()
     df1 = df1[(df1['status'] == '买盘') | (df1['status'] == '卖盘')]
     df2 = changeStockIndex(df1, 'stock_index')
     df2 = df2.sort_values('stock_index')
     df2['trade_stat'] = df2['status']
     df2['trade_stat'][df2['trade_stat'] == '买盘'] = 1
     df2['trade_stat'][df2['trade_stat'] == '卖盘'] = -1
     df2['trade_num_adjust'] = df2['trade_num'] * df2['trade_stat']
     return df2
Exemplo n.º 2
0
 def rawDataStkList():
     '''
     get the stock index list to loop to get
     part raw data
     '''
     data_dir = data_dict.get("basic_info")
     df1 = pd.read_csv(os.path.join(data_dir, "stock_basic_info.csv"))
     df1 = changeStockIndex(df1, 'code')
     stk_diaoyan_list = df1['stock_index'].tolist()
     #stk_diaoyan_list.sort()
     stk_diaoyan_list = [
         x for x in stk_diaoyan_list if x[0:2] == '60' or x[0:2] == '00'
     ]
     return stk_diaoyan_list
Exemplo n.º 3
0
select * from stock_test.dadan_200 
where day >='2020-01-01' and day<='2020-04-01'
"""

df1 = spark.sql(sql1)
# print the random data
df1.orderBy(rand()).limit(10).show()
#df1.sample(False, 0.5, seed=0).limit(50).show()
df2 = df1.toPandas()

df2['stock_date'] = df2['day']
aa = df2.groupby('stock_date').apply(lambda x: DaDanAna.DaDanAna(x).max_min().
                                     dadan_diff_stat().df_out).reset_index()
aa2 = aa[['stock_date', 'stock_index', 'buy_sale_diff']]
from functions.DF_process import changeStockIndex
aa3 = changeStockIndex(aa, 'stock_index')
from functions.stock_feature.mergeData import mergeData
mergeData.loadRollingReg()

df_merge = mergeData().loadRollingReg().mergeWithRollingReg(aa3)

df2 = pd.read_csv(
    "/home/davidyu/stock/data/tmp_data/stock_feature/rolling_regression/rolling_all.csv"
)

df_merge = pd.merge(df2, aa3, on=("stock_date", "stock_index"))
df_merge.to_csv('test.csv', index=0)

aa2['stock_index'] = aa['stock_index'].astype(str).zfill(6)
DaDanAna.DaDanAna(df2.head(10))
df_merge1 = DADAN_diff_stat(df2)
from davidyu_cfg import *
import pandas as pd
from functions.DF_process import changeStockIndex
from functions.day_history.getDataFromSpark import *

df1 = pd.read_csv("/home/davidyu/stock/data/basic_info/stock_basic_info.csv")
df1 = changeStockIndex(df1, 'code')
stk_diaoyan_list = df1['stock_index'].tolist()
stk_diaoyan_list = [
    x for x in stk_diaoyan_list if x[0:2] == '60' or x[0:2] == '00'
]
stk_diaoyan_tup = tuple(stk_diaoyan_list[0:20])

para = {
    'stock_tuple': stk_diaoyan_tup,
    'start_date': '',
    'end_date': '',
    "save_file_name": 'history_part1.csv'
}
getSparkData = getDataFromSpark(para)
getSparkData.getDataFromSparkAll()