def gen1minKData(self, vtSymbol, df_data): try: gLogger.info("start gen1minKData , vtSymbol is %s" % vtSymbol) c = 1 self.barDict[vtSymbol] = {} self.barDict[vtSymbol][c] = [] self.df["structTime"] = self.df["time"].map( lambda x: datetime.datetime.strptime(x, "%H%M%S%f")) for i in zip( *[iter(self.splitDict[vtSymbol][c][i:]) for i in range(2)]): start = datetime.datetime.strptime( str(i[0]).strip(), '%H:%M:%S') end = datetime.datetime.strptime(str(i[1]).strip(), '%H:%M:%S') if (start - datetime.timedelta(minutes=1) ).strftime('%H:%M') in self.AucTime: start = start - datetime.timedelta(minutes=1) p1 = df_data["structTime"] >= start p2 = df_data["structTime"] < end dfTemp = df_data.loc[p1 & p2] if len(dfTemp) > 2: self.barDict[vtSymbol][c].append(self.aggMethod(dfTemp)) dbNew = self.db.get_db("localhost", 27017, 'WIND_1_MIN_DB') self.db.insert2db(dbNew, vtSymbol, self.barDict[vtSymbol][c]) except Exception as e: gLogger.exception("Exception : %s" % e)
def delItemsFromRemove(self): try: gLogger.info("start delItemsFromRemove") indexList = list(set(self.removeList)) self.df = self.df.drop(indexList, axis=0) except Exception as e: gLogger.exception("Exception : %s" % e)
def convert2df(self): try: colNames = [ 'vtSymbol', 'symbol', 'date', 'time', 'lastPrice', 'lastVolume', 'lastTurnover', 'matchItems', 'openInterest', 'tradeFlag', 'bsFlag', 'volume', 'turnover', 'highPrice', 'lowPrice', 'openPrice', 'preClosePrice', 'settlementPrice', 'position', 'curDelta', 'preSettlementPrice', 'prePosition', 'askPrice1', 'askVolume1', 'bidPrice1', 'bidVolume1', 'askAvPrice', 'bidAvPrice', 'totalAskVolume', 'totalBidVolume', 'index', 'stocks', 'ups', 'downs', 'holdLines' ] seriesNames = locals() for k, v in enumerate(self.data['temp'][0][0].tolist()): if colNames[k] in [ 'askPrice1', 'askVolume1', 'bidPrice1', 'bidVolume1' ]: v = v[:, 0] if v.ndim == 1: seriesNames['SN_%s' % k] = pd.Series(v, name=colNames[k]) elif v.ndim > 1: v = v.ravel() seriesNames['SN_%s' % k] = pd.Series(v, name=colNames[k]) else: print("index = %d, dim = %d" % (k, v.ndim)) df_data = pd.concat( [seriesNames[a] for a in seriesNames if 'SN_' in a], axis=1) df_data = df_data.dropna(axis=0, how='any') dfData = self.normalizeData(df_data) return dfData except Exception as e: gLogger.exception("Exception when convert to df: %s" % e)
def genKData(self, vtSymbol, df_data): if not df_data.empty: cycle = self.cycle[1:] self.gen1minKData(vtSymbol, df_data) self.genOtherKData(vtSymbol, cycle) self.gen1DayKData(vtSymbol) else: gLogger.exception("df data is empty!")
def cleanSameTimestamp(self): """清除重复时间戳,记录""" try: gLogger.info("start cleanSameTimestamp") dfTemp = self.df.sort_values(by=['datetime'], ascending=False) idList = dfTemp[dfTemp["datetime"].duplicated()].index for i in idList.values: self.removeList.append(i) gLogger.debug('remove index = %d' % i) except Exception as e: gLogger.exception("Exception : %s" % e)
def cleanIllegalTradingTime(self): """删除非交易时段数据""" try: gLogger.info("start cleanIllegalTradingTime ") self.df['illegalTime'] = self.df["time"].map( self.StandardizeTimePeriod) self.df['illegalTime'] = self.df['illegalTime'].fillna(False) for i, row in self.df[self.df['illegalTime'] == False].iterrows(): self.removeList.append(i) gLogger.debug('remove index = %d' % (i)) del self.df["illegalTime"] except Exception as e: gLogger.exception("Exception: %s" % e)
def compare_time(self, s1, s2, st, ms): """由于time类型没有millisecond,故单取ms进行逻辑判断""" try: if s2 == time.strptime('00:00', '%H:%M'): s2 = time.strptime('23:59:61', '%H:%M:%S') if st > s1 and st < s2: return True elif (st == s1 and int(ms) >= 0) or (st == s2 and int(ms) == 0): return True else: return False except Exception as e: gLogger.exception("Exception when compare_time e = %s" % e)
def gen1DayKData(self, vtSymbol): try: gLogger.info("start gen1DayKData , vtSymbol = %s" % vtSymbol) c = '1Day' self.barDict[vtSymbol][c] = [] items = self.barDict[vtSymbol][1] dfTemp = pd.DataFrame(items) if not dfTemp.empty: self.barDict[vtSymbol][c].append(self.aggMethod(dfTemp)) dbNew = self.db.get_db("localhost", 27017, 'WIND_' + str(c) + '_MIN_DB') self.db.insert2db(dbNew, vtSymbol, self.barDict[vtSymbol][c]) except Exception as e: gLogger.exception("Exception : %s" % e)
def estimateExceptional(self, field): try: gLogger.info("start estimateExceptional, field = %s" % field) dfTemp = pd.DataFrame(self.df[field]) dfTemp["shift"] = self.df[field].shift(1) dfTemp["delta"] = abs(dfTemp[field] - dfTemp["shift"]) dfTemp = dfTemp.dropna(axis=0, how='any') dfTemp["IsExcept"] = dfTemp["delta"] >= dfTemp["shift"] * 0.12 for i, row in dfTemp.loc[dfTemp["IsExcept"]].iterrows(): if i not in self.removeList: self.logList.append(i) gLogger.debug('Field = %s, log index = %d' % (field, i)) except Exception as e: gLogger.exception("Exception : %s" % e)
def paddingWithPrevious(self, field): try: gLogger.info("start paddingWithPrevious, field = %s" % field) for i, row in self.df.loc[self.df[field] == 0.0].iterrows(): if i not in self.removeList: preIndex = i - 1 if preIndex >= 0 and i not in self.removeList: row[field] = self.df.loc[preIndex, field] self.df.loc[i, field] = row[field] self.updateList.append(i) gLogger.debug('Field = %s, update index = %d' % (field, i)) except Exception as e: gLogger.exception("Exception : %s" % e)
def getTimeList(self, cycle): try: gLogger.info("start getTimeList") if not os.path.exists(self.timeFilePath): os.makedirs(self.timeFilePath) # filePath = self.timeFilePath + 'timeSeries_' + self.Symbol + '.pickle' # if os.path.exists(filePath) and datetime.datetime.fromtimestamp(os.path.getmtime(filePath)).replace(hour=0,minute=0,second=0,microsecond=0) == \ # datetime.datetime.today().replace(hour=0,minute=0,second=0,microsecond=0): # gLogger.info("splitDict is load from pickle file") # with open(filePath, 'rb') as handle: # self.splitDict[self.Symbol] = pickle.load(handle) # else: self.genTimeList(self.Symbol, cycle) self.saveTimeList() except Exception as e: gLogger.exception("Exception : %s" % e)
def aggMethod(self, dfTemp): try: tempBar = {} tempBar["vtSymbol"] = dfTemp.iloc[0]["vtSymbol"] tempBar["symbol"] = dfTemp.iloc[0]["symbol"] tempBar["date"] = dfTemp.iloc[0]["date"] tempBar["time"] = dfTemp.iloc[0]["time"] tempBar["openInterest"] = float(dfTemp.iloc[-1]["openInterest"]) tempBar["volume"] = float(dfTemp["lastVolume"].sum()) tempBar["turnover"] = float(dfTemp["lastTurnover"].sum()) tempBar["high"] = float(max(dfTemp["lastPrice"])) tempBar["low"] = float(min(dfTemp["lastPrice"])) tempBar["open"] = float(dfTemp.iloc[0]["lastPrice"]) tempBar["close"] = float(dfTemp.iloc[-1]["lastPrice"]) tempBar["datetime"] = dfTemp.iloc[0]["datetime"] return tempBar except Exception as e: gLogger.exception("Exception when exec aggMethod e:%s" % e)
def StandardizeTimePeriod(self, target): tar = target try: tp = self.dfInfo.loc[self.Symbol]["CurrPeriod"] time1 = [t for i in tp.split(',') for t in i.split('-')] ms = tar[-3:] tar = tar[:-3] tar = time.strptime(tar, '%H%M%S') for i in zip(*([iter(time1)] * 2)): start = time.strptime(str(i[0]).strip(), '%H:%M') end = time.strptime(str(i[1]).strip(), '%H:%M') if self.compare_time(start, end, tar, ms): return True except Exception as e: gLogger.exception( "Exception when StandardizeTimePeriod e = %s time = %s" % (e, str(tar)))
def normalizeData(self, df): try: gLogger.info("start normalize df Data") f = lambda x: str(int(x)) df["date"] = df["date"].map(f) df["time"] = df["time"].map(f) df["time"] = df["time"].map(lambda x: x.zfill(9)) vtSymbol = self.matFile.split('\\')[-1].split('_')[0] symbol = "".join([a for a in vtSymbol if a.isalpha()]).lower() date = self.matFile.split('\\')[-1].split('_')[-1][:-4] df["vtSymbol"] = vtSymbol df["symbol"] = symbol df["DT"] = df["date"] + ' ' + df["time"] df["datetime"] = df["DT"].map(self.convert2datetime) del df["DT"] df["date"] = date return df except Exception as e: gLogger.exception("Exception when convert to df: %s" % e)
def reserveLastTickInAuc(self): """保留集合竞价期间最后一个tick数据""" try: gLogger.info("start reserveLastTickInAuc") self.df["structTime"] = self.df["time"].map( lambda x: datetime.datetime.strptime(x, "%H%M%S%f")) for st in self.AucTime: start = datetime.datetime.strptime(st, '%H:%M') end = start + datetime.timedelta(minutes=1) p1 = self.df["structTime"] >= start p2 = self.df["structTime"] < end dfTemp = self.df.loc[p1 & p2] dfTemp = dfTemp.sort_values(by=["structTime"], ascending=False) for i in dfTemp.index.values[1:]: self.removeList.append(i) gLogger.debug('remove index = %d' % i) del self.df["structTime"] except Exception as e: gLogger.exception("Exception : %s" % e)
def initCleanRegulation(self): gLogger.info("start initCleanRegulation") dbNew = self.db.get_db("localhost", 27017, 'WIND_TICK_DB') i = self.df["vtSymbol"][0] try: if "IFC" in i or "IHC" in i or "ICC" in i or "TFC" in i: i = i[:2] self.Symbol = "".join([a for a in i if a.isalpha()]).lower() self.initList() if not self.df.empty: self.cleanIllegalTradingTime() self.cleanSameTimestamp() self.reserveLastTickInAuc() self.cleanNullVolTurn() self.cleanNullPriceIndicator() self.cleanNullOpenInter() self.recordExceptionalPrice() self.delItemsFromRemove() self.db.insert2db(dbNew, i, self.df) except Exception as e: gLogger.exception("Exception: %s" % e)
def genOtherKData(self, vtSymbol, cycle): for c in cycle: try: gLogger.info("start genOtherKData cycle = %d" % c) self.barDict[vtSymbol][c] = [] for i in zip( * [iter(self.splitDict[vtSymbol][c][i:]) for i in range(2)]): self.start1 = time.strptime(str(i[0]).strip(), '%H:%M:%S') self.end1 = time.strptime(str(i[1]).strip(), '%H:%M:%S') items = list( map(self.selectItems, self.barDict[vtSymbol][1])) items = list(filter(lambda x: x is not None, items)) dfTemp = pd.DataFrame(items) if len(dfTemp) > 2: self.barDict[vtSymbol][c].append( self.aggMethod(dfTemp)) dbNew = self.db.get_db("localhost", 27017, 'WIND_' + str(c) + '_MIN_DB') self.db.insert2db(dbNew, vtSymbol, self.barDict[vtSymbol][c]) except Exception as e: gLogger.exception("Exception : %s" % e)
def genTimeList(self, symbol, cycle): try: tempDict = {} self.splitDict[symbol] = {} for c in cycle: gLogger.info("start genTimeList, cycle = %d" % c) tempDict[c] = [] self.splitDict[symbol][c] = [] tp = self.dfInfo.loc[symbol]["CurrPeriod"] time1 = [t for i in tp.split(',') for t in i.split('-')] for i in zip(*([iter(time1)] * 2)): start = str(i[0]).strip() end = str(i[1]).strip() if start in self.AucTime: start1 = datetime.datetime.strptime( start, "%H:%M") + datetime.timedelta(minutes=1) start = start1.strftime("%H:%M") else: while ([ 60 if datetime.datetime.strptime( start, "%H:%M").minute == 0 else datetime. datetime.strptime(start, "%H:%M").minute ][0] % int(c) != 0): start1 = datetime.datetime.strptime( start, "%H:%M") + datetime.timedelta(minutes=10) start = start1.strftime("%H:%M") tempList = pd.date_range( start, end, freq=(str(c) + 'min')).time.tolist() tempDict[c].extend(tempList) # tempDict[c].extend(pd.date_range(end, end, freq=(str(c) + 'min')).time.tolist()) lst = list(set(tempDict[c])) lst.sort() self.splitDict[symbol][c] = lst except Exception as e: gLogger.exception("Exception : %s" % e)