def getTickDataAndFactorsByDateFromLocalFile(self, code, date, factors=TICKFACTORSUSED): myfactor = factorBase() mydata = pd.DataFrame() for item in factors: factor = item['factor'] data = myfactor.getDataFromLocalFile(code, date, factor) if mydata.shape[0] == 0: #如果还没有取出来数据 mydata = data.copy() pass elif data.shape[0] != 0: mydata = pd.merge(mydata, data, how='left', left_index=True, right_index=True) pass tick = TickDataProcess() tickData = tick.getDataByDateFromLocalFile(code, date) mydata = pd.merge(mydata, tickData, how='left', left_index=True, right_index=True) if mydata.shape[0] == 0: return mydata #dailyFactor=['closeStd','index','marketValue','industry'] dailyRepo = dailyFactorsProcess() dailyData = dailyRepo.getSingleStockDailyFactors(code, date, date) for col in dailyData.columns: if col not in ['date', 'code', 'return']: mydata[col] = dailyData[col].iloc[0] dailyKLineRepo = KLineDataProcess('daily') dailyKLineData = dailyKLineRepo.getDataByDate(code, date, date) mydata['preClose'] = dailyKLineData['preClose'].iloc[0] mydata['increaseToday'] = mydata['midPrice'] / mydata['preClose'] - 1 ceiling = mydata[(mydata['B1'] == 0) | (mydata['S1'] == 0)] if ceiling.shape[0] > 0: ceilingTime = ceiling['tick'].iloc[0] mydata = mydata[mydata['tick'] < ceilingTime] pass return mydata
def updateAllFactorsByCodeAndDate(self, code, date): code = str(code) date = str(date) data = pd.DataFrame() logger.info(f'Compute factors of {code} in {date} start!') factorList = TICKFACTORSNEEDTOUPDATE for factor in factorList: mymodule = importlib.import_module(factor['module']) myclass = getattr(mymodule, factor['class']) myinstance = myclass() exists = myinstance.checkLocalFile(code, date, factor['factor']) if exists == False: if data.shape[0] == 0: tick = TickDataProcess() data = tick.getDataByDateFromLocalFile(code, date) if data.shape[0] == 0: logger.warning( f'There is no tickShots of {code} in {date}') return pass myinstance.updateFactor(code, date, data) pass
def updateFactor(self, code, date, data=pd.DataFrame()): exists = super().checkLocalFile(code, date, self.factor) if exists == True: logger.info( f'No need to compute! {self.factor} of {code} in {date} exists!' ) pass if data.shape[0] == 0: #data=TickDataProcess().getDataByDateFromLocalFile(code,date) data = TickDataProcess().getTickShotDataFromInfluxdbServer( code, date) result = self.computerFactor(code, date, data) super().updateFactor(code, date, self.factor, result)
def getLastTradedayTickData(self, code, date): previousday = TradedayDataProcess.getPreviousTradeday(date, 250) days = list(TradedayDataProcess.getTradedays(previousday, date)) days.reverse() data = pd.DataFrame() for day in days: if day < date: #data=TickDataProcess().getDataByDateFromLocalFile(code,day) data = TickDataProcess().getTickShotDataFromInfluxdbServer( code, day) if data.shape[0] > 0: return data pass pass pass return data pass
def saveAllFactorsByCodeAndDate(self, code, date): mypath = os.path.join(self.path, str(code).replace('.', '_')) HDF5Utility.pathCreate(mypath) fileName = os.path.join(mypath, str(date) + '.h5') exists = HDF5Utility.fileCheck(fileName) if exists == True: #如果文件已将存在,直接返回 return myfactor = factorBase() mydata = pd.DataFrame() factors = self.factorsUsed #获取tick因子数据 mydata = self.getFactorsUsedByDateFromLocalFile(code, date, factors) #获取tick行情数据 tick = TickDataProcess() tickData = tick.getDataByDateFromLocalFile(code, date) mydata = pd.merge(mydata, tickData, how='left', left_index=True, right_index=True) if mydata.shape[0] == 0: return #获取日线数据 dailyRepo = dailyFactorsProcess() dailyData = dailyRepo.getSingleStockDailyFactors(code, date, date) dailyKLineRepo = KLineDataProcess('daily') dailyKLineData = dailyKLineRepo.getDataByDate(code, date, date) mydata['preClose'] = dailyKLineData['preClose'].iloc[0] mydata['increaseToday'] = mydata['midPrice'] / mydata['preClose'] - 1 mydata = mydata[mydata['tick'] < '145700000'] #删去涨跌停之后的数据 ceiling = mydata[(mydata['B1'] == 0) | (mydata['S1'] == 0)] if ceiling.shape[0] > 0: ceilingTime = ceiling['tick'].iloc[0] mydata = mydata[mydata['tick'] < ceilingTime] pass excludedColumns = [ 'preClose', 'buyVolume2', 'buyVolume5', 'buyVolume10', 'sellVolume2', 'sellVolume5', 'sellVolume10' ] mycolumns = list( set(mydata.columns).difference( set(list(tickData.columns) + excludedColumns))) mycolumns.sort() mydata = mydata[mycolumns] if mydata.shape[0] == 0: return try: logger.info(f'Recording factors of {code} in {date}!') with pd.HDFStore(fileName, 'a', complib='blosc:zstd', append=True, complevel=9) as store: store.append('data', mydata, append=True, format="table", data_columns=mydata.columns) except Exception as excp: logger.error(f'{fileName} error! {excp}')
def saveAllFactorsToInfluxdbByCodeAndDay(self, code, date): code = str(code) date = str(date) database = INFLUXDBTICKFACTORDATABASE measurement = str(code) tag = {} myfactor = factorBase() mydata = pd.DataFrame() data = pd.DataFrame() factorList = TICKFACTORSNEEDTOUPDATE for factor in factorList: mymodule = importlib.import_module(factor['module']) myclass = getattr(mymodule, factor['class']) myinstance = myclass() if data.shape[0] == 0: tick = TickDataProcess() #data=tick.getDataByDateFromLocalFile(code,date) data = tick.getTickShotDataFromInfluxdbServer(code, date) if data.shape[0] == 0: #logger.warning(f'There is no tickShots of {code} in {date}') return highLimit = data.iloc[0]['highLimit'] preClose = data.iloc[0]['dailyPreClose'] if (highLimit / preClose - 1) < 0.06: #logger.warning(f'The stock {code} is ST in {date}') return pass factorData = myinstance.computerFactor(code, date, data) if factorData.shape[0] > 0: if mydata.shape[0] == 0: mydata = factorData else: mydata = pd.merge(mydata, factorData, how='left', left_index=True, right_index=True) #合并tick行情数据 mydata = pd.merge(mydata, data[[ 'code', 'date', 'tick', 'midPrice', 'realData', 'dailyPreClose', 'dailyOpen', 'B1', 'S1', 'BV1', 'SV1' ]], how='left', left_index=True, right_index=True) if mydata.shape[0] == 0: return mydata[ 'increaseToday'] = mydata['midPrice'] / mydata['dailyPreClose'] - 1 mydata = mydata[mydata['tick'] < '145700000'] #删去涨跌停之后的数据 ceiling = mydata[(mydata['B1'] == 0) | (mydata['S1'] == 0)] if ceiling.shape[0] > 0: ceilingTime = ceiling['tick'].iloc[0] mydata = mydata[mydata['tick'] < ceilingTime] pass if mydata.shape[0] == 0: return try: logger.info(f'Recording factors to influxdb of {code} in {date}!') InfluxdbUtility.saveDataFrameDataToInfluxdb( mydata, database, measurement, tag) except Exception as excp: pass #logger.error(f'{fileName} error! {excp}') pass
def singleCode(self, code: str, startDate: str, endDate: str, parameters=[]): days = list(TradedayDataProcess().getTradedays(startDate, endDate)) tickFactors = tickFactorsProcess() tick = TickDataProcess() daily = dailyFactorsProcess() dailyKLine = KLineDataProcess('daily') trade = [] for day in days: #tickData=tick.getDataByDateFromLocalFile(code,day) tickData = tick.getTickShotDataFromInfluxdbServer(code, day) if tickData.shape[0] == 0: continue data = tickFactors.getTickFactorsOnlyByDateFromLocalFile(code, day) data = pd.merge(data, tickData, how='left', left_index=True, right_index=True) dailyData = daily.getSingleStockDailyFactors(code, day, day) for col in dailyData.columns: if col not in ['date', 'code']: data[col] = dailyData[col].iloc[0] dailyKLineData = dailyKLine.getDataByDate(code, day, day) data['preClose'] = dailyKLineData['preClose'].iloc[0] data['increaseToday'] = data['midPrice'] / data['preClose'] - 1 if np.isnan(data['weight300'].iloc[0]) == True: continue maxPosition = round( data['weight300'].iloc[0] * 100000000 / data['preClose'].iloc[0] / 100, -2) features = [ 'buyForce', 'sellForce', 'buySellForceChange', 'buySellSpread', 'differenceHighLow3m', 'midToVwap', 'midToVwap3m', 'midPrice3mIncrease', 'midPriceBV3m', 'midInPrevious3m', 'midStd60', 'increaseToday', 'closeStd20', 'buySellVolumeRatio2', 'buySellWeightedVolumeRatio2', 'buySellVolumeRatio5', 'buySellWeightedVolumeRatio5', 'buySellVolumeRatio10', 'buySellWeightedVolumeRatio10' ] A = data[features] A = self.dataSelect(A, 0.2) A = A.values maxWeight = np.array([ 0.03218688, -0.0121024, -0.00970715, 0.48172206, 0.42610642, 0.10048948, -0.05574053, 0.08212702, -0.12357012, -0.00123216, 0.09529259, 0.00509518, 0.14970625, -0.00291313, 0.00402094, -0.00452788, 0.00286216, 0.0020172, -0.00235546 ]) minWeight = np.array([ -0.00385887, -0.01163938, 0.0043455, -0.01114819, -0.34286923, 0.08314041, 0.00154458, 0.12249813, -0.02194375, -0.00038749, -0.02217015, 0.00610296, -0.09264385, -0.0020065, 0.00249547, -0.00324293, 0.00501176, 0.00389697, -0.00294958 ]) maxIntercept = 0.00079871 minIntercept = -0.00155935 mymax = A.dot(maxWeight) + maxIntercept mymin = A.dot(minWeight) + minIntercept data['maxPredict'] = mymax data['minPredict'] = mymin data['maxPredict'] = data['maxPredict'].ewm(span=2, ignore_na=True, adjust=True).mean() data['minPredict'] = data['minPredict'].ewm(span=2, ignore_na=True, adjust=True).mean() data['midPredict'] = (data['maxPredict'] + data['minPredict']) / 2 m = data[[ 'midIncreaseMinNext5m', 'midIncreaseMaxNext5m', 'maxPredict', 'minPredict', 'midPredict' ]] print(m.corr()) #long=data[(data['maxPredict']>0.01)]['midIncreaseMaxNext5m'].mean()-data['midIncreaseMaxNext5m'].mean() #short=data[(data['minPredict']<-0.01)]['midIncreaseMinNext5m'].mean()-data['midIncreaseMinNext5m'].mean() #print(long) #print(short) mycolumns = list(tickData.columns) mycolumns.append('maxPredict') mycolumns.append('minPredict') data = data[mycolumns] parameters = { 'maxPosition': maxPosition, 'longOpen': 0.015, 'shortOpen': -0.015, 'longClose': 0.01, 'shortClose': -0.01, 'transactionRatio': 0.2 } #trade0=self.strategy(data,parameters) #trade.append(trade0) pass if len(trade) == 0: trade = pd.DataFrame() else: trade = pd.concat(trade) trade['code'] = code trade['fee'] = trade['price'] * 0.0001 selectBuy = trade['direction'] == 'buy' selectSell = trade['direction'] == 'sell' trade.loc[selectSell, 'fee'] = (trade['fee'] + trade['price'] * 0.001)[selectSell] trade.loc[selectBuy, 'cashChange'] = ((-trade['price'] - trade['fee']) * trade['volume'])[selectBuy] trade.loc[selectSell, 'cashChange'] = ((trade['price'] - trade['fee']) * trade['volume'])[selectSell] trade['amount'] = trade['price'] * trade['volume'] return trade pass
def singleCode(self, code: str, startDate: str, endDate: str, parameters=[]): days = list(TradedayDataProcess().getTradedays(startDate, endDate)) tickFactors = tickFactorsProcess() tick = TickDataProcess() daily = dailyFactorsProcess() dailyKLine = KLineDataProcess('daily') file = os.path.join(LocalFileAddress, 'tmp', 'dnn001_midIncreaseNext5m.h5') trade = [] for day in days: tickData = tick.getDataByDateFromLocalFile(code, day) if tickData.shape[0] == 0: continue data = tickFactors.getTickFactorsOnlyByDateFromLocalFile(code, day) data = pd.merge(data, tickData, how='left', left_index=True, right_index=True) dailyData = daily.getSingleStockDailyFactors(code, day, day) for col in dailyData.columns: if col not in ['date', 'code']: data[col] = dailyData[col].iloc[0] dailyKLineData = dailyKLine.getDataByDate(code, day, day) data['preClose'] = dailyKLineData['preClose'].iloc[0] data['increaseToday'] = data['midPrice'] / data['preClose'] - 1 if np.isnan(data['weight300'].iloc[0]) == True: continue maxPosition = round( data['weight300'].iloc[0] * 100000000 / data['preClose'].iloc[0] / 100, -2) features = [ 'buyForce', 'sellForce', 'buySellForceChange', 'buySellSpread', 'differenceHighLow3m', 'midToVwap', 'midToVwap3m', 'midPrice3mIncrease', 'midPriceBV3m', 'midInPrevious3m', 'midStd60', 'increaseToday', 'closeStd20', 'buySellVolumeRatio2', 'buySellWeightedVolumeRatio2', 'buySellVolumeRatio5', 'buySellWeightedVolumeRatio5', 'buySellVolumeRatio10', 'buySellWeightedVolumeRatio10' ] A = data[features] A = self.dataSelect(A, 0.2) A = A.values warnings.filterwarnings('ignore') model = keras.models.load_model(file) predictArray = model.predict(A, verbose=0) mymin = predictArray[:, 0] mymax = predictArray[:, 1] data['maxPredict'] = mymax data['minPredict'] = mymin data['maxPredict'] = data['maxPredict'].ewm(span=2, ignore_na=True, adjust=True).mean() data['minPredict'] = data['minPredict'].ewm(span=2, ignore_na=True, adjust=True).mean() data['midPredict'] = (data['maxPredict'] + data['minPredict']) / 2 m = data[[ 'midIncreaseMinNext5m', 'midIncreaseMaxNext5m', 'maxPredict', 'minPredict', 'midPredict' ]] pd.set_option('display.max_rows', None) #print(m.corr()) #long=data[(data['maxPredict']>0.003)]['midIncreaseMaxNext5m'].mean()-data['midIncreaseMaxNext5m'].mean() #short=data[(data['minPredict']<-0.003)]['midIncreaseMinNext5m'].mean()-data['midIncreaseMinNext5m'].mean() #print(long) #print(short) mycolumns = list(tickData.columns) mycolumns.append('maxPredict') mycolumns.append('minPredict') mycolumns.append('midPredict') mycolumns.append('increaseToday') mycolumns.append('midInPrevious3m') data = data[mycolumns] parameters = { 'maxPosition': maxPosition, 'longOpen': 0.015, 'shortOpen': -0.015, 'longClose': 0.01, 'shortClose': -0.01, 'transactionRatio': 0.1 } trade0 = self.strategy(data, parameters) trade.append(trade0) pass if len(trade) == 0: trade = pd.DataFrame() else: trade = pd.concat(trade) if trade.shape[0] == 0: return pd.DataFrame() trade['code'] = code trade['fee'] = trade['price'] * 0.0001 selectBuy = trade['direction'] == 'buy' selectSell = trade['direction'] == 'sell' trade.loc[selectSell, 'fee'] = (trade['fee'] + trade['price'] * 0.001)[selectSell] trade.loc[selectBuy, 'cashChange'] = ((-trade['price'] - trade['fee']) * trade['volume'])[selectBuy] trade.loc[selectSell, 'cashChange'] = ((trade['price'] - trade['fee']) * trade['volume'])[selectSell] trade['amount'] = trade['price'] * trade['volume'] return trade pass