def insert_hist_trade(self): self.set_data() db = Db() engine = db._get_engine() sql_stocklist = "select code,name from stock_code" codes = pd.read_sql_query(sql_stocklist, engine) codes = codes.to_dict('records') i = 1 for row in codes: gta = td.get_hist_data(code=row['code'], start=self.nowdate, end=self.nowdate, ktype='D', retry_count=3, pause=0.001) gta['datain_date'] = self.nowtime gta['code'] = row['code'] gta['name'] = row['name'] gta['c_yearmonthday'] = gta.index gta = gta.to_dict('records') try: db.insertmany( """INSERT INTO trade_hist(c_yearmonthday,code,name,open,high,close,low,volume,price_change,p_change,ma5,ma10,ma20,v_ma5,v_ma10,v_ma20,turnover,datain_date) VALUES (%(c_yearmonthday)s,%(code)s,%(name)s,%(open)s,%(high)s,%(close)s,%(low)s,%(volume)s,%(price_change)s,%(p_change)s,%(ma5)s,%(ma10)s,%(ma20)s,%(v_ma5)s,%(v_ma10)s,%(v_ma20)s,%(turnover)s,%(datain_date)s)""", gta) except Exception, e: log.error('insert error:%s ', e) log.info('%s stock insert finished,%s,%s', i, row['code'], row['name'].decode('utf-8')) i += 1
def get_predict_acc1(debug=False): db = Db() engine = db._get_engine() sql_tradehist = "select code,name,p_change from trade_hist where code in (select code from predict_head where c_yearmonthday in (select max(c_yearmonthday) from predict_head) ) order by c_yearmonthday desc" sql_predicthead = "select code,predict from predict_head order by c_yearmonthday desc" if debug: pass df_trade = pd.read_sql_query(sql_tradehist, engine).head(2) df_predict = pd.read_sql_query(sql_predicthead, engine).head(2) df = pd.merge(df_trade, df_predict, on='code') df['acc'] = (df.p_change > 0).astype(float) return df
def get_hist_orgindata(debug=False): db = Db() engine = db._get_engine() sql_stocklist = "select * from trade_hist where code in (select code from trade_hist where high<>0.0 and low <>0.0 group by code having count(code)>100)" if debug: sql_stocklist += " and code in ('002717','601888','002405')" df = pd.read_sql_query(sql_stocklist, engine) codes = df['code'].unique() # 增加技术指标 df = add_volatility(df) df = get_technique(df) return df, codes
def get_predict(debug=False): db = Db() engine = db._get_engine() sql_stocklist = "select * from predicts where datain_date in(select max(datain_date) from predicts) order by predict desc" if debug: pass df = pd.read_sql_query(sql_stocklist, engine) headpredict = df.head(2) psummery = df.describe().T psummery.columns = ['p_cnt', 'p_mean', 'p_std', 'p_min', 'p25', 'p50', 'p75', 'p_max'] return psummery, headpredict
def get_predict(debug=False): db = Db() engine = db._get_engine() sql_stocklist = "select * from predicts where datain_date in(select max(datain_date) from predicts) order by predict desc" if debug: pass df = pd.read_sql_query(sql_stocklist, engine) headpredict = df.head(2) psummery = df.describe().T psummery.columns = [ 'p_cnt', 'p_mean', 'p_std', 'p_min', 'p25', 'p50', 'p75', 'p_max' ] return psummery, headpredict
def get_predict_acc2(debug=False): db = Db() engine = db._get_engine() sql_stocklist = "select * from acc1" if debug: pass df = pd.read_sql_query(sql_stocklist, engine) acc2 = df.sort_values('c_yearmonthday', ascending=0) acc2 = acc2.head(2) acc2 = acc2.groupby('c_yearmonthday').sum() acc2_final = pd.DataFrame() acc2_final['h_p_acc'] = [df['acc'].sum() / float(df['acc'].count())] acc2_final['h_p_change'] = [df['p_change'].sum() / 2.0] acc2_final['p_acc'] = [acc2['acc'].sum() / 2.0] acc2_final['p_change'] = [acc2['p_change'].sum() / 2.0] return acc2_final
def get_histdata(split=0.15, seg_len=3, debug=False, datatype='cnn'): db = Db() engine = db._get_engine() sql_stocklist = "select * from trade_record where code in (select code from trade_record where high<>0.0 and low <>0.0 group by code having count(code)=(select count(distinct c_yearmonthday) from trade_record))" if debug: sql_stocklist += " and code in ('300138','002372')" df = pd.read_sql_query(sql_stocklist, engine) stockcodes = df['code'].unique() X_train = [] X_valid = [] Y_train = [] Y_valid = [] ID_train = [] ID_valid = [] log.info('begin generate train data and validate data.') begin_time = time.clock() k = 0 for codes in stockcodes: temp_df = df[df.code == codes] tradedaylist = temp_df.copy(deep=True)['c_yearmonthday'].values tradedaylist.sort() tradedaylist = tradedaylist[::-1] if len(tradedaylist) < seg_len: log.info('not enough trade days ,code is :%s', codes) continue validdays = np.round(split * len(tradedaylist)) i = 0 for day in tradedaylist: i += 1 segdays = tradedaylist[i:i + seg_len] if len(segdays) < seg_len: break SEG_X = [] data = [] for segday in segdays: data = temp_df[temp_df.c_yearmonthday == segday][ ['changepercent', 'trade', 'open', 'high', 'low', 'settlement', 'volume', 'turnoverratio', 'amount', 'per', 'pb', 'mktcap', 'nmc', 'deltat']] data = data.values SEG_X.append(data[0]) # SEG_X=np.array(SEG_X).T if datatype == 'cnn': SEG_X = [SEG_X] data_tag = temp_df[temp_df.c_yearmonthday == day][ ['code', 'name', 'changepercent']] temp_y = data_tag['changepercent'].values[0] temp_y = to_cate01(temp_y) temp_id = data_tag['code'].values[0] if (i > 0 and i <= validdays): X_valid.append(SEG_X) ID_valid.append(temp_id) Y_valid.append(temp_y) else: X_train.append(SEG_X) ID_train.append(temp_id) Y_train.append(temp_y) k += 1 if k % 500 == 0: log.info('%s stock finished ', k) log.info('generate data finished ,cost time:%s', time.clock() - begin_time) log.info('X_train shape is :%s', np.asarray(X_train).shape) log.info('Y_train shape is :%s', np.asarray(Y_train).shape) log.info('X_valid shape is :%s', np.asarray(X_valid).shape) log.info('Y_valid shape is :%s', np.asarray(Y_valid).shape) # X_train=normalize(X_train) # X_valid=normalize(X_valid) if debug: print(np.asarray(X_train), np.asarray(Y_train), np.asarray(ID_train)), (np.asarray(X_valid), np.asarray(Y_valid), np.asarray(ID_valid)) print(np.asarray(X_train[0][0][0])) pickle.dump( ((np.asarray(X_train), np.asarray(Y_train), np.asarray(ID_train)), (np.asarray(X_valid), np.asarray(Y_valid), np.asarray(ID_valid))), open(datatype + '_seg' + str(seg_len) + '.pkl', 'wb'))
def get_hist6years(split=0.2, seg_len=3, debug=False, datatype='cnn', datafile=None, predict_days=18): log = logger.log db = Db() engine = db._get_engine() sql_stocklist = "select * from trade_hist where code in (select code from trade_hist where high<>0.0 and low <>0.0 group by code having count(code)>100)" if debug: sql_stocklist += " and code in ('002717','601888','002405')" df = pd.read_sql_query(sql_stocklist, engine) # 增加技术指标 df = add_volatility(df) stockcodes = df['code'].unique() df = get_technique(df, stockcodes) X_train = [] X_valid = [] Y_train = [] Y_valid = [] ID_train = [] ID_valid = [] log.info('begin generate train data and validate data.') begin_time = time.clock() k = 0 predict_days = predict_days for codes in stockcodes: temp_df = df[df.code == codes] temp_df1 = temp_df.copy(deep=True) temp_df1 = temp_df1.sort_values(by='c_yearmonthday', ascending=1) tradedaylist = temp_df1['c_yearmonthday'].values tradedaylist.sort() tradedaylist = tradedaylist[::-1] temp_df1 = temp_df1.set_index('c_yearmonthday') if len(tradedaylist) < seg_len: log.info('not enough trade days ,code is :%s', codes) continue validdays = np.round(split * len(tradedaylist)) # validdays = 2 i = 0 for day in tradedaylist: i += 1 segdays = tradedaylist[i + predict_days:i + predict_days + seg_len] segbegin = segdays[len(segdays) - 1] segend = segdays[0] if len(segdays) < seg_len: break data = [] # for segday in segdays: data = temp_df1.loc[segbegin:segend, [ 'open', 'high', 'close', 'low', 'volume', 'price_change', 'p_change', 'ma5', 'ma10', 'ma20', 'v_ma5', 'v_ma10', 'v_ma20', 'turnover', 'deltat', 'BIAS_B', 'BIAS_S', 'BOLL_B', 'BOLL_S', 'CCI_B', 'CCI_S', 'DMI_B', 'DMI_HL', 'DMI_IF1', 'DMI_IF2', 'DMI_MAX1', 'DMI_S', 'KDJ_B', 'KDJ_S', 'KD_B', 'KD_S', 'MACD', 'MACD_B', 'MACD_DEA', 'MACD_DIFF', 'MACD_EMA_12', 'MACD_EMA_26', 'MACD_EMA_9', 'MACD_S', 'MA_B', 'MA_S', 'PSY_B', 'PSY_MYPSY1', 'PSY_S', 'ROC_B', 'ROC_S', 'RSI_B', 'RSI_S', 'VR_B', 'VR_IF1', 'VR_IF2', 'VR_IF3', 'VR_S', 'XYYH_B', 'XYYH_B1', 'XYYH_B2', 'XYYH_B3', 'XYYH_CC', 'XYYH_DD' ]] data = data.values if datatype == 'cnn': data = [data] d1 = tradedaylist[i - 1] d3 = tradedaylist[i + predict_days - 1] data_tag = temp_df[temp_df.c_yearmonthday == d1][ ['code', 'name', 'p_change', 'close']] data_tag3 = temp_df[temp_df.c_yearmonthday == d3][ ['code', 'name', 'p_change', 'close']] temp_y = data_tag['close'].values[0] temp_y3 = data_tag3['close'].values[0] temp_y = (temp_y - temp_y3) / temp_y3 temp_y = to_cate01(temp_y) temp_id = data_tag['code'].values[0] if (i > 0 and i <= validdays): X_valid.append(data) ID_valid.append(temp_id) Y_valid.append(temp_y) else: X_train.append(data) ID_train.append(temp_id) Y_train.append(temp_y) k += 1 samples = 12 if k % samples == 0: print k log.info('%s stock finished ', k) yield ((np.asarray(X_train), np.asarray(Y_train), np.asarray(ID_train)), (np.asarray(X_valid), np.asarray(Y_valid), np.asarray(ID_valid))) X_train = [] X_valid = [] Y_train = [] Y_valid = [] ID_train = [] ID_valid = [] yield ((np.asarray(X_train), np.asarray(Y_train), np.asarray(ID_train)), (np.asarray(X_valid), np.asarray(Y_valid), np.asarray(ID_valid)))
def get_today(split=0.2, seg_len=3, debug=False, datatype='cnn', datafile=None): log = logger.log db = Db() engine = db._get_engine() sql_stocklist = "select * from trade_hist where code in (select code from trade_hist where high<>0.0 and low <>0.0 group by code having count(code)>100)" if debug: sql_stocklist += " and code in ('002717','601888','002405')" df = pd.read_sql_query(sql_stocklist, engine) df = add_volatility(df) stockcodes = df['code'].unique() df = get_technique(df) print stockcodes X_predict = [] ID_predict = [] NAME_predict = [] log.info('begin generate train data and validate data.') k = 0 for codes in stockcodes: temp_df = df[df.code == codes] temp_df1 = temp_df.copy(deep=True) temp_df1 = temp_df1.sort_values(by='c_yearmonthday', ascending=1) tradedaylist = temp_df1['c_yearmonthday'].values tradedaylist.sort() tradedaylist = tradedaylist[::-1] temp_df1 = temp_df1.set_index('c_yearmonthday') if len(tradedaylist) < seg_len: log.info('not enough trade days ,code is :%s', codes) continue i = 0 segdays = tradedaylist[i:i + seg_len] segbegin = segdays[len(segdays) - 1] segend = segdays[0] if len(segdays) < seg_len: break data = [] data = temp_df1.loc[segbegin:segend, [ 'open', 'high', 'close', 'low', 'volume', 'price_change', 'p_change', 'ma5', 'ma10', 'ma20', 'v_ma5', 'v_ma10', 'v_ma20', 'turnover', 'deltat', 'BIAS_B', 'BIAS_S', 'BOLL_B', 'BOLL_S', 'CCI_B', 'CCI_S', 'DMI_B', 'DMI_HL', 'DMI_IF1', 'DMI_IF2', 'DMI_MAX1', 'DMI_S', 'KDJ_B', 'KDJ_S', 'KD_B', 'KD_S', 'MACD', 'MACD_B', 'MACD_DEA', 'MACD_DIFF', 'MACD_EMA_12', 'MACD_EMA_26', 'MACD_EMA_9', 'MACD_S', 'MA_B', 'MA_S', 'PSY_B', 'PSY_MYPSY1', 'PSY_S', 'ROC_B', 'ROC_S', 'RSI_B', 'RSI_S', 'VR_B', 'VR_IF1', 'VR_IF2', 'VR_IF3', 'VR_S', 'XYYH_B', 'XYYH_B1', 'XYYH_B2', 'XYYH_B3', 'XYYH_CC', 'XYYH_DD' ]] data = data.values if datatype == 'cnn': data = [data] data_tag = temp_df[temp_df.c_yearmonthday == tradedaylist[0]][ ['code', 'name', 'p_change']] temp_id = data_tag['code'].values[0] temp_name = data_tag['name'].values[0] X_predict.append(data) ID_predict.append(temp_id) NAME_predict.append(temp_name) k += 1 log.info('%s stock finished ', k) return (np.asarray(X_predict), np.asarray(ID_predict), np.asarray(NAME_predict))
def get_histdata(split=0.15, seg_len=3, debug=False, datatype='cnn'): db = Db() engine = db._get_engine() sql_stocklist = "select * from trade_record where code in (select code from trade_record where high<>0.0 and low <>0.0 group by code having count(code)=(select count(distinct c_yearmonthday) from trade_record))" if debug: sql_stocklist += " and code in ('300138','002372')" df = pd.read_sql_query(sql_stocklist, engine) stockcodes = df['code'].unique() X_train = [] X_valid = [] Y_train = [] Y_valid = [] ID_train = [] ID_valid = [] log.info('begin generate train data and validate data.') begin_time = time.clock() k = 0 for codes in stockcodes: temp_df = df[df.code == codes] tradedaylist = temp_df.copy(deep=True)['c_yearmonthday'].values tradedaylist.sort() tradedaylist = tradedaylist[::-1] if len(tradedaylist) < seg_len: log.info('not enough trade days ,code is :%s', codes) continue validdays = np.round(split * len(tradedaylist)) i = 0 for day in tradedaylist: i += 1 segdays = tradedaylist[i:i + seg_len] if len(segdays) < seg_len: break SEG_X = [] data = [] for segday in segdays: data = temp_df[temp_df.c_yearmonthday == segday][[ 'changepercent', 'trade', 'open', 'high', 'low', 'settlement', 'volume', 'turnoverratio', 'amount', 'per', 'pb', 'mktcap', 'nmc', 'deltat' ]] data = data.values SEG_X.append(data[0]) # SEG_X=np.array(SEG_X).T if datatype == 'cnn': SEG_X = [SEG_X] data_tag = temp_df[temp_df.c_yearmonthday == day][[ 'code', 'name', 'changepercent' ]] temp_y = data_tag['changepercent'].values[0] temp_y = to_cate01(temp_y) temp_id = data_tag['code'].values[0] if (i > 0 and i <= validdays): X_valid.append(SEG_X) ID_valid.append(temp_id) Y_valid.append(temp_y) else: X_train.append(SEG_X) ID_train.append(temp_id) Y_train.append(temp_y) k += 1 if k % 500 == 0: log.info('%s stock finished ', k) log.info('generate data finished ,cost time:%s', time.clock() - begin_time) log.info('X_train shape is :%s', np.asarray(X_train).shape) log.info('Y_train shape is :%s', np.asarray(Y_train).shape) log.info('X_valid shape is :%s', np.asarray(X_valid).shape) log.info('Y_valid shape is :%s', np.asarray(Y_valid).shape) # X_train=normalize(X_train) # X_valid=normalize(X_valid) if debug: print(np.asarray(X_train), np.asarray(Y_train), np.asarray(ID_train)), (np.asarray(X_valid), np.asarray(Y_valid), np.asarray(ID_valid)) print(np.asarray(X_train[0][0][0])) pickle.dump( ((np.asarray(X_train), np.asarray(Y_train), np.asarray(ID_train)), (np.asarray(X_valid), np.asarray(Y_valid), np.asarray(ID_valid))), open(datatype + '_seg' + str(seg_len) + '.pkl', 'wb'))
def get_hist6years(split=0.2, seg_len=3, debug=False, datatype='cnn', datafile=None, predict_days=18): log = logger.log db = Db() engine = db._get_engine() sql_stocklist = "select * from trade_hist where code in (select code from trade_hist where high<>0.0 and low <>0.0 group by code having count(code)>100)" if debug: sql_stocklist += " and code in ('002717','601888','002405')" df = pd.read_sql_query(sql_stocklist, engine) # 增加技术指标 df = add_volatility(df) stockcodes = df['code'].unique() df = get_technique(df, stockcodes) X_train = [] X_valid = [] Y_train = [] Y_valid = [] ID_train = [] ID_valid = [] log.info('begin generate train data and validate data.') begin_time = time.clock() k = 0 predict_days = predict_days for codes in stockcodes: temp_df = df[df.code == codes] temp_df1 = temp_df.copy(deep=True) temp_df1 = temp_df1.sort_values(by='c_yearmonthday', ascending=1) tradedaylist = temp_df1['c_yearmonthday'].values tradedaylist.sort() tradedaylist = tradedaylist[::-1] temp_df1 = temp_df1.set_index('c_yearmonthday') if len(tradedaylist) < seg_len: log.info('not enough trade days ,code is :%s', codes) continue validdays = np.round(split * len(tradedaylist)) # validdays = 2 i = 0 for day in tradedaylist: i += 1 segdays = tradedaylist[i + predict_days:i + predict_days + seg_len] segbegin = segdays[len(segdays) - 1] segend = segdays[0] if len(segdays) < seg_len: break data = [] # for segday in segdays: data = temp_df1.loc[segbegin:segend, [ 'open', 'high', 'close', 'low', 'volume', 'price_change', 'p_change', 'ma5', 'ma10', 'ma20', 'v_ma5', 'v_ma10', 'v_ma20', 'turnover', 'deltat', 'BIAS_B', 'BIAS_S', 'BOLL_B', 'BOLL_S', 'CCI_B', 'CCI_S', 'DMI_B', 'DMI_HL', 'DMI_IF1', 'DMI_IF2', 'DMI_MAX1', 'DMI_S', 'KDJ_B', 'KDJ_S', 'KD_B', 'KD_S', 'MACD', 'MACD_B', 'MACD_DEA', 'MACD_DIFF', 'MACD_EMA_12', 'MACD_EMA_26', 'MACD_EMA_9', 'MACD_S', 'MA_B', 'MA_S', 'PSY_B', 'PSY_MYPSY1', 'PSY_S', 'ROC_B', 'ROC_S', 'RSI_B', 'RSI_S', 'VR_B', 'VR_IF1', 'VR_IF2', 'VR_IF3', 'VR_S', 'XYYH_B', 'XYYH_B1', 'XYYH_B2', 'XYYH_B3', 'XYYH_CC', 'XYYH_DD' ]] data = data.values if datatype == 'cnn': data = [data] d1 = tradedaylist[i - 1] d3 = tradedaylist[i + predict_days - 1] data_tag = temp_df[temp_df.c_yearmonthday == d1][[ 'code', 'name', 'p_change', 'close' ]] data_tag3 = temp_df[temp_df.c_yearmonthday == d3][[ 'code', 'name', 'p_change', 'close' ]] temp_y = data_tag['close'].values[0] temp_y3 = data_tag3['close'].values[0] temp_y = (temp_y - temp_y3) / temp_y3 temp_y = to_cate01(temp_y) temp_id = data_tag['code'].values[0] if (i > 0 and i <= validdays): X_valid.append(data) ID_valid.append(temp_id) Y_valid.append(temp_y) else: X_train.append(data) ID_train.append(temp_id) Y_train.append(temp_y) k += 1 samples = 12 if k % samples == 0: print k log.info('%s stock finished ', k) yield ((np.asarray(X_train), np.asarray(Y_train), np.asarray(ID_train)), (np.asarray(X_valid), np.asarray(Y_valid), np.asarray(ID_valid))) X_train = [] X_valid = [] Y_train = [] Y_valid = [] ID_train = [] ID_valid = [] yield ((np.asarray(X_train), np.asarray(Y_train), np.asarray(ID_train)), (np.asarray(X_valid), np.asarray(Y_valid), np.asarray(ID_valid)))
def get_today(split=0.2, seg_len=3, debug=False, datatype='cnn', datafile=None): log = logger.log db = Db() engine = db._get_engine() sql_stocklist = "select * from trade_hist where code in (select code from trade_hist where high<>0.0 and low <>0.0 group by code having count(code)>100)" if debug: sql_stocklist += " and code in ('002717','601888','002405')" df = pd.read_sql_query(sql_stocklist, engine) df = add_volatility(df) stockcodes = df['code'].unique() df = get_technique(df) print stockcodes X_predict = [] ID_predict = [] NAME_predict = [] log.info('begin generate train data and validate data.') k = 0 for codes in stockcodes: temp_df = df[df.code == codes] temp_df1 = temp_df.copy(deep=True) temp_df1 = temp_df1.sort_values(by='c_yearmonthday', ascending=1) tradedaylist = temp_df1['c_yearmonthday'].values tradedaylist.sort() tradedaylist = tradedaylist[::-1] temp_df1 = temp_df1.set_index('c_yearmonthday') if len(tradedaylist) < seg_len: log.info('not enough trade days ,code is :%s', codes) continue i = 0 segdays = tradedaylist[i:i + seg_len] segbegin = segdays[len(segdays) - 1] segend = segdays[0] if len(segdays) < seg_len: break data = [] data = temp_df1.loc[segbegin:segend, [ 'open', 'high', 'close', 'low', 'volume', 'price_change', 'p_change', 'ma5', 'ma10', 'ma20', 'v_ma5', 'v_ma10', 'v_ma20', 'turnover', 'deltat', 'BIAS_B', 'BIAS_S', 'BOLL_B', 'BOLL_S', 'CCI_B', 'CCI_S', 'DMI_B', 'DMI_HL', 'DMI_IF1', 'DMI_IF2', 'DMI_MAX1', 'DMI_S', 'KDJ_B', 'KDJ_S', 'KD_B', 'KD_S', 'MACD', 'MACD_B', 'MACD_DEA', 'MACD_DIFF', 'MACD_EMA_12', 'MACD_EMA_26', 'MACD_EMA_9', 'MACD_S', 'MA_B', 'MA_S', 'PSY_B', 'PSY_MYPSY1', 'PSY_S', 'ROC_B', 'ROC_S', 'RSI_B', 'RSI_S', 'VR_B', 'VR_IF1', 'VR_IF2', 'VR_IF3', 'VR_S', 'XYYH_B', 'XYYH_B1', 'XYYH_B2', 'XYYH_B3', 'XYYH_CC', 'XYYH_DD' ]] data = data.values if datatype == 'cnn': data = [data] data_tag = temp_df[temp_df.c_yearmonthday == tradedaylist[0]][[ 'code', 'name', 'p_change' ]] temp_id = data_tag['code'].values[0] temp_name = data_tag['name'].values[0] X_predict.append(data) ID_predict.append(temp_id) NAME_predict.append(temp_name) k += 1 log.info('%s stock finished ', k) return (np.asarray(X_predict), np.asarray(ID_predict), np.asarray(NAME_predict))