def __init__(self, limit_close_pct=1, kPatters: [] = None): Find_KPattern_skip1_predit2.__init__(self, limit_close_pct=limit_close_pct) self.kPatters = kPatters self.pct_split = [-7, -5, -3, -1.0, 0, 1, 3, 5, 7] self.pctEncoder = FloatEncoder(self.pct_split) self.kPattersMap = {} self.allTradyDayCount = 0 self.allTradeDay = 0 self.occurDayMap = {} for value in kPatters: self.kPattersMap[value] = True pass
def __findCenterPct(self, pct_list, min_pct, max_pct, best_pct, best_probal) -> Union[float, float]: if max_pct - min_pct < 0.01: return best_pct, best_probal pct = (max_pct + min_pct) / 2 encoder = FloatEncoder([pct]) flaotRangeList = self.__computeRangeFloatList(pct_list, encoder, False) probal = flaotRangeList[0].probal if abs(probal - 0.5) < abs(best_probal - 0.5): best_pct = pct best_probal = probal if probal > 0.5: ##说明pct值过大 pct2, probal2 = self.__findCenterPct(pct_list, min_pct, pct, best_pct, best_probal) else: pct2, probal2 = self.__findCenterPct(pct_list, pct, max_pct, best_pct, best_probal) if abs(probal2 - 0.5) < abs(best_probal - 0.5): best_pct = pct2 best_probal = probal2 return best_pct, best_probal
def __init__(self): self.lasted15Bar = np.array([ None, None, None, None, None, None, None, None, None, None, None, None, None, None, None ]) self.lasted3BarKdj = np.array([None, None, None]) self.lasted3BarMacd = np.array([None, None, None]) self.lasted3BarArron = np.array([None, None]) self.kdjEncoder = FloatEncoder([15, 30, 45, 60, 75, 90]) self.mDateOccurCountMap = {} ##统计产生收集个数的次数 self.sw = SWImpl()
class PredictModel: PctEncoder1 = FloatEncoder([-7, -5, -3, -2, -1, 0, 1, 2, 3, 5, 7], minValue=-10, maxValue=10) PctEncoder2 = FloatEncoder( [-7.5, -5.5, -3.5, -2.5, -1.5, -0.5, 0.5, 1.5, 2.5, 3.5, 5.5, 7.5], minValue=-10, maxValue=10) @abstractmethod def predict( self, data: Union[CollectData, Sequence['CollectData']] ) -> Union[PredictData, Sequence['PredictData']]: # 1、加载 pass """ 返回预测结果是否匹配: isSellOk,isBuyOk """ @abstractmethod def predictResult(self, data: PredictData) -> Union[bool, bool]: pass @abstractmethod def buildQuantData(self): pass @abstractmethod def buildPredictModel(self, split_rate=0.7, useSVM=True): pass """ 自我测试,返回sell_core,buy_score """ @abstractmethod def selfTest(self) -> Tuple[float, float]: pass
def __computeRangeFloatList(self, pct_list: [], encoder: FloatEncoder, sort=True) -> Sequence['FloatRange']: rangeCount = {} totalCount = len(pct_list) for i in range(0, encoder.mask()): rangeCount[i] = 0 for pct in pct_list: encode = encoder.encode(pct) rangeCount[encode] += 1 rangeList = [] for encode, count in rangeCount.items(): probal = 0.0 if totalCount > 0: probal = count / totalCount floatRange = FloatRange(encode=encode, probal=probal) rangeList.append(floatRange) if sort: return FloatRange.sort(rangeList) return rangeList
def __findBestFloatEncoder( self, pct_list: [], originEncoder: FloatEncoder ) -> Union[FloatEncoder, Sequence['FloatRange']]: SCALE = 5000 min, max = originEncoder.parseEncode(int(originEncoder.mask() / 2)) min = int(min * SCALE) max = int(max * SCALE) step = int((max - min) / 100) bestProbal = 0 bestEncoder = originEncoder bestRnageList = None for shift in range(min, max, step): d = shift / SCALE encoder = originEncoder.shift(d) flaotRangeList = self.__computeRangeFloatList(pct_list, encoder) probal = flaotRangeList[0].probal if probal > bestProbal: bestProbal = probal bestEncoder = encoder bestRnageList = flaotRangeList return bestEncoder, bestRnageList
def getBuyFloatEncoder(self) -> FloatEncoder: return FloatEncoder(self.buySplits)
def build(self, soruce: BarDataSource, model: CoreEngineModel, split_rate=0.7, limit_dimen_size=-1, min_size=300, onlyDimens: [] = None, build_quant_data_only=False): self.logger = LogUtil.create_Filelogger(f"{self.__file_dir}/build.log", "build") self.printLog("\n\nbuild() start(只适用于日K线)...", True) self.__model = model # collector.onCreate() bars, code = soruce.nextBars() dataSet = {} totalCount = 0 dateOccurCount: {} = {} #记录各个交易日的产生收集数据的个数。 while not bars is None: #前40个不算 for i in range(40, len(bars)): occurtDate: datetime = bars[i].datetime occurtDate = datetime(year=occurtDate.year, month=occurtDate.month, day=occurtDate.day) occurCount = dateOccurCount.get(occurtDate) if occurCount is None: dateOccurCount[occurtDate] = 0 finishedList, validDataList = model.collectBars( bars, code, onlyDimens) self.printLog( f"collect code:{code}, finished:{len(finishedList)},stop:{len(validDataList)}" ) totalCount += len(finishedList) bars, code = soruce.nextBars() for data in finishedList: ##收录 listData: [] = dataSet.get(data.dimen) if listData is None: listData = [] dataSet[data.dimen] = listData listData.append(data) dimes = dataSet.keys() self.printLog(f"总共收集到{totalCount}数据,维度个数:{len(dimes)}", True) fitlerDataSet = {} __the_count = 0 dataCountList = [] for dimen, listData in dataSet.items(): if limit_dimen_size > 0 and limit_dimen_size <= __the_count: ##限制个数 break size = len(listData) dataCountList.append(size) if size >= min_size: __the_count += 1 fitlerDataSet[dimen] = listData for data in listData: #统计发生次数。 occurtDate: datetime = data.occurBars[-1].datetime occurtDate = datetime(year=occurtDate.year, month=occurtDate.month, day=occurtDate.day) occurCount = dateOccurCount.get(occurtDate) if not occurCount is None: dateOccurCount[occurtDate] = occurCount + 1 filePath = self.__getCollectFilePath(dimen) ##保存收集来的数据。 with open(filePath, 'wb+') as fp: pickle.dump(listData, fp, -1) dimenCountEncoder = FloatEncoder( [10, 50, 100, 150, 200, 500, 1000, 2000, 5000]) dimenCountRangeList = dimenCountEncoder.computeValueDisbustion( dataCountList) ##打印维度的分布情况 self.printLog( f"各个维度数量分布情况(将过滤数量少于{min_size}个的维度):\n {FloatRange.toStr(dimenCountRangeList,dimenCountEncoder)}" ) occurDateCountEncoder = FloatEncoder([1, 3, 8, 15, 30]) occurDateCountRangeList = occurDateCountEncoder.computeValueDisbustion( list(dateOccurCount.values())) ##打印维度的分布情况 self.printLog( f"每个交易日产生收集数据个数的分布情况:\n {FloatRange.toStr(occurDateCountRangeList, occurDateCountEncoder)}" ) dataSet = fitlerDataSet self.__saveDimeenAndQuantData(dataSet) shutil.rmtree(self.__getModelDirPath()) # 递归删除一个目录以及目录内的所有内容 if build_quant_data_only == False: self.__buildAndSaveModelData(split_rate, True) self.load(model) self.__ouptBuildDataToFiles() self.logger = None
class CoreEngineImpl(CoreEngine): COLLECT_DATA_FILE_NAME = "colllect" ##量化数据的涨幅分布区域。 quantFloatEncoder = FloatEncoder([-7, -4.5, -3, -1.5, 0, 1.5, 3, 4.5, 7]) def __init__(self, dirPath: str): self.mAllDimension: ['Dimension'] = None self.mQuantDataMap: {} = None self.mAbilityMap: {} = None self.__file_dir = dirPath self.__model = None self.logger = None if not os.path.exists(dirPath): os.makedirs(dirPath) collectDir = self.__getCollectDirPath() if not os.path.exists(collectDir): os.makedirs(collectDir) modelDir = self.__getModelDirPath() if not os.path.exists(modelDir): os.makedirs(modelDir) def printLog(self, info: str, forcePrint=False): if self.logger is None: print(f"[CoreEngineImpl]: {info}") else: self.logger.info(f"{info}") def __getDimenisonFilePath(self): return f"{self.__file_dir}/dimension.bin" def __getCollectDirPath(self): return f"{self.__file_dir}/colllect" def __getModelDirPath(self): return f"{self.__file_dir}/model" def __getQuantFilePath(self): return f"{self.__file_dir}/quantData.bin" def __getAbilityFilePath(self): return f"{self.__file_dir}/abilityData.bin" def __getCollectFilePath(self, dimen: Dimension): dirPath = f"{self.__getCollectDirPath()}/{dimen.getKey()}" return dirPath def __getModelFilePath(self, dimen: Dimension): dirPath = f"{self.__getModelDirPath()}/{dimen.getKey()}" return dirPath def load(self, model: CoreEngineModel): if self.logger is None: self.logger = LogUtil.create_Filelogger( f"{self.__file_dir}/load.log", "load") self.__model = model self.printLog("load() start...", True) with open(self.__getDimenisonFilePath(), 'rb') as fp: self.mAllDimension = pickle.load(fp) with open(self.__getQuantFilePath(), 'rb') as fp: self.mQuantDataMap = pickle.load(fp) self.__modelLoaded = False if os.path.exists(self.__getAbilityFilePath()): self.__modelLoaded = True with open(self.__getAbilityFilePath(), 'rb') as fp: self.mAbilityMap = pickle.load(fp) self.printLog(f"load() finished,总共加载{len(self.mAllDimension)}个维度数据", True) assert len(self.mQuantDataMap) == len(self.mAllDimension) self.logger = LogUtil.create_Filelogger(f"{self.__file_dir}/run.log", "run") def build(self, soruce: BarDataSource, model: CoreEngineModel, split_rate=0.7, limit_dimen_size=-1, min_size=300, onlyDimens: [] = None, build_quant_data_only=False): self.logger = LogUtil.create_Filelogger(f"{self.__file_dir}/build.log", "build") self.printLog("\n\nbuild() start(只适用于日K线)...", True) self.__model = model # collector.onCreate() bars, code = soruce.nextBars() dataSet = {} totalCount = 0 dateOccurCount: {} = {} #记录各个交易日的产生收集数据的个数。 while not bars is None: #前40个不算 for i in range(40, len(bars)): occurtDate: datetime = bars[i].datetime occurtDate = datetime(year=occurtDate.year, month=occurtDate.month, day=occurtDate.day) occurCount = dateOccurCount.get(occurtDate) if occurCount is None: dateOccurCount[occurtDate] = 0 finishedList, validDataList = model.collectBars( bars, code, onlyDimens) self.printLog( f"collect code:{code}, finished:{len(finishedList)},stop:{len(validDataList)}" ) totalCount += len(finishedList) bars, code = soruce.nextBars() for data in finishedList: ##收录 listData: [] = dataSet.get(data.dimen) if listData is None: listData = [] dataSet[data.dimen] = listData listData.append(data) dimes = dataSet.keys() self.printLog(f"总共收集到{totalCount}数据,维度个数:{len(dimes)}", True) fitlerDataSet = {} __the_count = 0 dataCountList = [] for dimen, listData in dataSet.items(): if limit_dimen_size > 0 and limit_dimen_size <= __the_count: ##限制个数 break size = len(listData) dataCountList.append(size) if size >= min_size: __the_count += 1 fitlerDataSet[dimen] = listData for data in listData: #统计发生次数。 occurtDate: datetime = data.occurBars[-1].datetime occurtDate = datetime(year=occurtDate.year, month=occurtDate.month, day=occurtDate.day) occurCount = dateOccurCount.get(occurtDate) if not occurCount is None: dateOccurCount[occurtDate] = occurCount + 1 filePath = self.__getCollectFilePath(dimen) ##保存收集来的数据。 with open(filePath, 'wb+') as fp: pickle.dump(listData, fp, -1) dimenCountEncoder = FloatEncoder( [10, 50, 100, 150, 200, 500, 1000, 2000, 5000]) dimenCountRangeList = dimenCountEncoder.computeValueDisbustion( dataCountList) ##打印维度的分布情况 self.printLog( f"各个维度数量分布情况(将过滤数量少于{min_size}个的维度):\n {FloatRange.toStr(dimenCountRangeList,dimenCountEncoder)}" ) occurDateCountEncoder = FloatEncoder([1, 3, 8, 15, 30]) occurDateCountRangeList = occurDateCountEncoder.computeValueDisbustion( list(dateOccurCount.values())) ##打印维度的分布情况 self.printLog( f"每个交易日产生收集数据个数的分布情况:\n {FloatRange.toStr(occurDateCountRangeList, occurDateCountEncoder)}" ) dataSet = fitlerDataSet self.__saveDimeenAndQuantData(dataSet) shutil.rmtree(self.__getModelDirPath()) # 递归删除一个目录以及目录内的所有内容 if build_quant_data_only == False: self.__buildAndSaveModelData(split_rate, True) self.load(model) self.__ouptBuildDataToFiles() self.logger = None def buildPredictModel(self, split_rate=0.7, useSVM=True): self.__buildAndSaveModelData(split_rate, useSVM) self.__ouptBuildDataToFiles() def buildQuantData(self): dataSet = {} for dimen in self.mAllDimension: cDatas = self.loadCollectData(dimen) assert len(cDatas) > 0 dataSet[dimen] = cDatas self.__saveDimeenAndQuantData(dataSet) self.__ouptBuildDataToFiles() def __ouptBuildDataToFiles(self): _outputfileName = f"{self.__file_dir}/build.xlsx" writer = pd.ExcelWriter(_outputfileName) def quant_data_to_list(dimen: Dimension, q: QuantData) -> []: return [ dimen.value, q.count, q.sellCenterPct, q.buyCenterPct, q.getPowerRate() ] _quant_columns = [ 'dimen', "count", "sCenterPct", "bCenterPct", "power" ] _quant_values = [] for dimen in self.mAllDimension: _quantData = self.queryQuantData(dimen) _quant_values.append(quant_data_to_list(dimen, _quantData)) pd.DataFrame(_quant_values, columns=_quant_columns) \ .to_excel(writer, sheet_name="quantData") if self.__modelLoaded: def ability_data_to_list(dimen: Dimension, q: PredictAbilityData) -> []: return [ dimen.value, q.getStabilitySell(), q.getStabilityBuy(), q.trainData.count, q.trainData.scoreSell, q.trainData.scoreBuy, q.trainData.biasSellWin, q.trainData.biasBuyWin, q.trainData.biasSellLoss, q.trainData.biasBuyLoss, q.testData.count, q.testData.scoreSell, q.testData.scoreBuy, q.testData.biasSellWin, q.testData.biasBuyWin, q.testData.biasSellLoss, q.testData.biasBuyLoss ] _ability_columns = [ 'dimen', 's稳定性', 'b稳定性', "count|训", "sScore|训", "bScore|训", "s正方差|训", "b正方差|训", "s负方差|训", "b负方差|训", "count|测", "sScore|测", "bScore|测", "s正方差|测", "b正方差|测", "s负方差|测", "b负方差|测" ] _ability_values = [] for dimen in self.mAllDimension: _abilityData = self.queryPredictAbilityData(dimen) _ability_values.append( ability_data_to_list(dimen, _abilityData)) pd.DataFrame(_ability_values, columns=_ability_columns) \ .to_excel(writer, sheet_name="abilityData") writer.save() writer.close() def __saveDimeenAndQuantData(self, dataSet: {}): self.printLog(f"开始保存量化数据", True) saveDimens = [] saveCollectCount = 0 maxSize = 0 minSize = 9999999999 quantMap = {} for dimen, listData in dataSet.items(): size = len(listData) quantData = self.computeQuantData(listData) self.__printQuantData(dimen, quantData) quantData.check() quantMap[dimen] = quantData maxSize = max(maxSize, size) minSize = min(minSize, size) saveDimens.append(dimen) saveCollectCount += size with open(self.__getDimenisonFilePath(), 'wb+') as fp: pickle.dump(saveDimens, fp, -1) with open(self.__getQuantFilePath(), 'wb+') as fp: pickle.dump(quantMap, fp, -1) self.mAllDimension = saveDimens self.mQuantDataMap = quantMap self.printLog( f"build() finished, 总共保存{len(saveDimens)}/{len(dataSet)}个维度数据,共{saveCollectCount}个数据,其中最多{maxSize},最小{minSize}", True) def __buildAndSaveModelData(self, split_rate: float, useSVM: bool): oldLogger = self.logger self.logger = LogUtil.create_Filelogger( f"{self.__file_dir}/buildModel.log", "buidModel") def cmp_collectdata_time(c1: CollectData, c2: CollectData): d1 = c1.occurBars[-1].datetime d2 = c2.occurBars[-1].datetime if d1 < d2: return -1 if d1 == d2: return 0 return 1 dimen_list: Sequence['Dimension'] = [] with open(self.__getDimenisonFilePath(), 'rb') as fp: dimen_list = pickle.load(fp) abilityDataMap = {} count = len(dimen_list) run_count = 0 for dimen in dimen_list: run_count += 1 self.printLog( f"正在计算并保存模型数据:dime={dimen},progress={run_count}/{count}", True) dataList = self.loadCollectData(dimen) size = len(dataList) trainSize = int(size * split_rate) trainDataList: Sequence['CollectData'] = [] testDataList: Sequence['CollectData'] = [] dataList = sorted(dataList, key=cmp_to_key(cmp_collectdata_time), reverse=False) split_date = None for i in range(0, size): data = dataList[i] if i < trainSize: trainDataList.append(data) elif i == trainSize: testDataList.append(data) split_date = data.occurBars[-1].datetime else: testDataList.append(data) ##确保切割的时间顺序 assert data.occurBars[-1].datetime >= split_date ablityData = self.__buildModelAbility(dimen, trainDataList, testDataList, useSVM) abilityDataMap[dimen] = ablityData ##保存模型 model = ClassifierModel(self, dimen, useSVM) model.build(self, dataList, self.mQuantDataMap[dimen]) model.save(self.__getModelFilePath(dimen)) ##saveAbliitTy with open(self.__getAbilityFilePath(), 'wb+') as fp: pickle.dump(abilityDataMap, fp, -1) ##打印模型性能指标 pctEncoder = self.getEngineModel().getPctEncoder1() p_sell_pct_total = 0.0 p_buy_pct_total = 0.0 sell_stability_total = 0.0 buy_stablility_total = 0.0 sell_score_total = 0.0 buy_score_total = 0.0 for dimen, abilityData in abilityDataMap.items(): s_min, s_max = pctEncoder.parseEncode( abilityData.sellPctRnageList[0].encode) p_sell_pct_total += ( s_min + (s_max - s_min) * abilityData.sellPctRnageList[0].probal) s_min, s_max = pctEncoder.parseEncode( abilityData.buyPctRnageList[0].encode) p_buy_pct_total += ( s_min + (s_max - s_min) * abilityData.buyPctRnageList[0].probal) sell_stability_total += abilityData.getStabilitySell() buy_stablility_total += abilityData.getStabilityBuy() sell_score_total += abilityData.getScoreSell() buy_score_total += abilityData.getScoreBuy() self.printLog( f"dimen:{dimen.value},count = {abilityData.getCount()}" f",s_得分={abilityData.getScoreSell()}" f",b_得分={abilityData.getScoreBuy()}" f",s_稳定性={abilityData.getStabilitySell()}" f",b_稳定性={abilityData.getStabilityBuy()}" f",sell:正方差|负方差={abilityData.getBiasSell(True)}|{abilityData.getBiasSell(True)}" f",buy:正方差|负方差={abilityData.getBiasBuy(True)}|{abilityData.getBiasBuy(True)}" ) self.printLog( f" 预测SellPct值分布情况:{FloatRange.toStr(abilityData.sellPctRnageList,pctEncoder)}" ) self.printLog( f" 预测Buy_Pct值分布情况:{FloatRange.toStr(abilityData.buyPctRnageList,pctEncoder)}" ) abilitySize = len(abilityDataMap) self.printLog( f"【总体】: s_pct能力:%.2f,b_pct能力:%.2f,s得分:%.2f,b得分:%.2f,s稳定性:%.2f,b稳定性%.2f" % (p_sell_pct_total / abilitySize, p_buy_pct_total / abilitySize, sell_score_total / abilitySize, buy_score_total / abilitySize, sell_stability_total / abilitySize, buy_stablility_total / abilitySize)) self.__modelLoaded = True self.printLog(f"创建模型完成", True) self.logger = oldLogger def __buildModelAbility(self, dimen: Dimension, trainDataList: Sequence['CollectData'], testDataList: Sequence['CollectData'], useSVM: bool): self.printLog("buildAbilityData:", True) trainQauntData = self.computeQuantData(trainDataList) model = ClassifierModel(self, dimen, useSVM) model.build(self, trainDataList, trainQauntData) _sell_pct_value_list = [] _buy_pct_value_list = [] _trainData = model.testScore(trainDataList, _sell_pct_value_list, _buy_pct_value_list) ##训练集验证分数 _testData = model.testScore(testDataList, _sell_pct_value_list, _buy_pct_value_list) ##测试集验证分数 abilityData = PredictAbilityData() abilityData.trainData = _trainData abilityData.testData = _testData pctEncoder = self.getEngineModel().getPctEncoder1() abilityData.sellPctRnageList = pctEncoder.computeValueDisbustion( _sell_pct_value_list) abilityData.buyPctRnageList = pctEncoder.computeValueDisbustion( _buy_pct_value_list) abilityData.sellPctRnageList = FloatRange.sort( abilityData.sellPctRnageList) abilityData.buyPctRnageList = FloatRange.sort( abilityData.buyPctRnageList) return abilityData def loadCollectData(self, dimen: Dimension) -> Sequence['CollectData']: filePath = self.__getCollectFilePath(dimen) collectData = None with open(filePath, 'rb') as fp: collectData = pickle.load(fp) return collectData def getEngineModel(self) -> CoreEngineModel: return self.__model def computeQuantData(self, dataList: Sequence['CollectData']) -> QuantData: return self.__computeQuantData(CoreEngineImpl.quantFloatEncoder, CoreEngineImpl.quantFloatEncoder, dataList) """ 计算编码分区最佳的QuantData """ def __findCenterPct(self, pct_list, min_pct, max_pct, best_pct, best_probal) -> Union[float, float]: if max_pct - min_pct < 0.01: return best_pct, best_probal pct = (max_pct + min_pct) / 2 encoder = FloatEncoder([pct]) flaotRangeList = self.__computeRangeFloatList(pct_list, encoder, False) probal = flaotRangeList[0].probal if abs(probal - 0.5) < abs(best_probal - 0.5): best_pct = pct best_probal = probal if probal > 0.5: ##说明pct值过大 pct2, probal2 = self.__findCenterPct(pct_list, min_pct, pct, best_pct, best_probal) else: pct2, probal2 = self.__findCenterPct(pct_list, pct, max_pct, best_pct, best_probal) if abs(probal2 - 0.5) < abs(best_probal - 0.5): best_pct = pct2 best_probal = probal2 return best_pct, best_probal """ 计算编码分区最佳的QuantData """ def __findBestFloatEncoder( self, pct_list: [], originEncoder: FloatEncoder ) -> Union[FloatEncoder, Sequence['FloatRange']]: SCALE = 5000 min, max = originEncoder.parseEncode(int(originEncoder.mask() / 2)) min = int(min * SCALE) max = int(max * SCALE) step = int((max - min) / 100) bestProbal = 0 bestEncoder = originEncoder bestRnageList = None for shift in range(min, max, step): d = shift / SCALE encoder = originEncoder.shift(d) flaotRangeList = self.__computeRangeFloatList(pct_list, encoder) probal = flaotRangeList[0].probal if probal > bestProbal: bestProbal = probal bestEncoder = encoder bestRnageList = flaotRangeList return bestEncoder, bestRnageList def __computeRangeFloatList(self, pct_list: [], encoder: FloatEncoder, sort=True) -> Sequence['FloatRange']: rangeCount = {} totalCount = len(pct_list) for i in range(0, encoder.mask()): rangeCount[i] = 0 for pct in pct_list: encode = encoder.encode(pct) rangeCount[encode] += 1 rangeList = [] for encode, count in rangeCount.items(): probal = 0.0 if totalCount > 0: probal = count / totalCount floatRange = FloatRange(encode=encode, probal=probal) rangeList.append(floatRange) if sort: return FloatRange.sort(rangeList) return rangeList def __getSellBuyPctLabel(self, cData: CollectData): sellPrice, buyPrice = self.getEngineModel().getYLabelPrice(cData) basePrice = self.getEngineModel().getYBasePrice(cData) __sell_pct = 100 * (sellPrice - basePrice) / basePrice __buy_pct = 100 * (buyPrice - basePrice) / basePrice return __sell_pct, __buy_pct def __computeQuantData(self, sellEncoder: FloatEncoder, buyEncoder: FloatEncoder, dataList: Sequence['CollectData']): sell_pct_list = [] buy_pct_list = [] totalCount = len(dataList) for data in dataList: bars: ['BarData'] = data.predictBars assert len(bars) > 0 sell_pct, buy_pct = self.getEngineModel().getYLabelPct(data) sell_pct_list.append(sell_pct) buy_pct_list.append(buy_pct) sellEncoder, sellRangeFloat = self.__findBestFloatEncoder( sell_pct_list, sellEncoder) buyEncoder, buyRangeFloat = self.__findBestFloatEncoder( buy_pct_list, buyEncoder) sell_center_pct, best_probal1 = self.__findCenterPct( sell_pct_list, sellEncoder.splits[0], sellEncoder.splits[-1], 0.0, 0.0) buy_center_pct, best_probal2 = self.__findCenterPct( buy_pct_list, buyEncoder.splits[0], buyEncoder.splits[-1], 0.0, 0.0) return QuantData(count=totalCount, sellRange=sellRangeFloat, buyRange=buyRangeFloat, sellCenterPct=sell_center_pct, buyCenterPct=buy_center_pct, sellSplits=sellEncoder.splits, buySplits=buyEncoder.splits) def collect( self, bars: ['BarData'] ) -> Tuple[Sequence['CollectData'], Sequence['CollectData']]: model = self.__model #collector.onCreate() code = bars[0].symbol finished, stop = model.collectBars(bars, code) return finished, stop def loadAllDimesion(self) -> Sequence['Dimension']: return self.mAllDimension def queryQuantData(self, dimen: Dimension) -> QuantData: return self.mQuantDataMap.get(dimen) def queryPredictAbilityData(self, dimen: Dimension) -> PredictAbilityData: return self.mAbilityMap.get(dimen) def isSupport(self, dimen: Dimension) -> bool: return not self.queryQuantData(dimen) is None def loadPredictModel(self, dimen: Dimension) -> PredictModel: if self.__modelLoaded and self.isSupport(dimen): model = ClassifierModel(self, dimen) model.load(self.__getModelFilePath(dimen)) return model return None def printTopDimension(self, pow_rate_limit=1.0): def com_quant(q1, q2): return q1.getPowerRate() - q2.getPowerRate() print(f"做多Top列表") dimeValues = [] quant_list = [] for dimen in dimens: quant = engine.queryQuantData(dimen) if quant.getPowerRate() >= pow_rate_limit: quant_list.append(quant) dimeValues.append(dimen.value) quant_list = sorted(quant_list, key=cmp_to_key(com_quant), reverse=True) for i in range(0, len(quant_list)): quant = quant_list[i] encoder = quant.getSellFloatEncoder() _min, _max = encoder.parseEncode(quant.sellRange[0].encode) print( f"[dime:{dimeValues[i]}]: count={quant.count},pow_rate=%.3f, probal=%.2f%%,centerPct=%.2f,sell:[{_min},{_max}]" % (quant.getPowerRate(), quant.getPowerProbal(True), quant.sellCenterPct)) print(f"top dimeValues: {dimeValues}") print(f"做空Top列表") dimeValues = [] quant_list = [] for dimen in dimens: quant = engine.queryQuantData(dimen) if quant.getPowerRate() <= -pow_rate_limit: quant_list.append(quant) dimeValues.append(dimen.value) quant_list = sorted(quant_list, key=cmp_to_key(com_quant), reverse=False) for i in range(0, len(quant_list)): quant = quant_list[i] encoder = quant.getBuyFloatEncoder() _min, _max = encoder.parseEncode(quant.buyRange[0].encode) print( f"[dime:{dimeValues[i]}]: count={quant.count},pow_rate=%.3f, probal=%.2f%%,centerPct=%.2f,buy:[{_min},{_max}]" % (quant.getPowerRate(), quant.getPowerProbal(False), quant.buyCenterPct)) print(f"top dimeValues: {dimeValues}") def __printQuantData(self, dimen, quant: QuantData): # print( # f", probal=%.2f%%,centerPct=%.2f,sell:[{_min},{_max}]" % ( # quant.getPowerRate(), quant.getPowerProbal(True), quant.sellCenterPct)) self.printLog( f"[dime:{dimen.value}]: count={quant.count},pow_rate=%.3f,sCenterPct=%.2f,bCenterPct=%.2f" % (quant.getPowerRate(), quant.sellCenterPct, quant.buyCenterPct)) self.printLog( f" sellRange:{FloatRange.toStr(quant.sellRange,quant.getSellFloatEncoder())}" ) self.printLog( f" buyRange:{FloatRange.toStr(quant.buyRange,quant.getBuyFloatEncoder())}" ) pass
def printKPatterMoreDetail(kPatters=[ 6, 3, 17, 81, 7, 5, 4, 82, 159, 16, 28, 83, 15, 84, 18, 27, 93, 104, 158, 92, 160, 236, 157, 94, 85, 80, 14, 8, 161, 9, 29, 170, 26, 19, 38, 2, 79 ]): from earnmi.data.SWImpl import SWImpl from vnpy.trader.constant import Exchange from vnpy.trader.constant import Interval sw = SWImpl() lists = sw.getSW2List() start = datetime(2014, 5, 1) end = datetime(2020, 8, 17) pct_split = [-7, -5, -3, -1.5, -0.5, 0.5, 1.5, 3, 5, 7] pct_split = [-7, -5, -3, -1.0, 0, 1, 3, 5, 7] pct_split = [-0.5, 0.5] pctEncoder = FloatEncoder(pct_split) kPattersMap = {} for value in kPatters: kPattersMap[value] = True class InnerData(object): kValue: int ## sell_disbute = np.zeros(pctEncoder.mask()) ##卖方力量分布情况 buy_disbute = np.zeros(pctEncoder.mask()) #买方力量分布情况 pass dataSet = {} occurDayMap = {} allTrayDay = 1 for code in lists: # for code in lists: barList = sw.getSW2Daily(code, start, end) indicator = Indicator(40) preBar = None previousIsMatch = False previousPatternVaule = None allTrayDay = max(allTrayDay, len(barList)) for i in range(0, len(barList)): bar = barList[i] indicator.update_bar(bar) patternValue = KPattern.encode1KAgo1(indicator) todayIsMatch = False if not patternValue is None: todayIsMatch = kPattersMap.__contains__(patternValue) if todayIsMatch: dayKey = bar.datetime.year * 13 * 35 + bar.datetime.month * 13 + bar.datetime.day occurDayMap[dayKey] = True pass if previousIsMatch: innerData: InnerData = dataSet.get(previousIsMatch) if innerData is None: innerData = InnerData() innerData.kValue = previousIsMatch dataSet[previousPatternVaule] = innerData sell_pct = 100 * ((bar.high_price + bar.close_price) / 2 - preBar.close_price) / preBar.close_price buy_pct = 100 * ((bar.low_price + bar.close_price) / 2 - preBar.close_price) / preBar.close_price innerData.buy_disbute[pctEncoder.encode(buy_pct)] += 1 innerData.sell_disbute[pctEncoder.encode(sell_pct)] += 1 pass preBar = bar previousIsMatch = todayIsMatch previousPatternVaule = patternValue print(f"所有交易日中,有意义的k线形态出现占比:%.2f%%" % (100 * len(occurDayMap) / allTrayDay)) for kValue, dataItem in dataSet.items(): total_count1 = 0 total_count2 = 0 for cnt in dataItem.sell_disbute: total_count1 += cnt for cnt in dataItem.buy_disbute: total_count2 += cnt assert total_count1 == total_count2 assert total_count1 > 0 print(f"\n\nk:%6d, " % (kValue)) print(f" 卖方价格分布:") for encode in range(0, len(dataItem.sell_disbute)): occurtRate = 100 * dataItem.sell_disbute[encode] / total_count1 print(f" {pctEncoder.descriptEncdoe(encode)}:%.2f%%" % (occurtRate)) print(f" 买方价格分布:") for encode in range(0, len(dataItem.buy_disbute)): occurtRate = 100 * dataItem.buy_disbute[encode] / total_count1 print(f" {pctEncoder.descriptEncdoe(encode)}:%.2f%%" % (occurtRate)) pass
class More_detail_KPattern_skip1_predit2(Find_KPattern_skip1_predit2): def __init__(self, limit_close_pct=1, kPatters: [] = None): Find_KPattern_skip1_predit2.__init__(self, limit_close_pct=limit_close_pct) self.kPatters = kPatters self.pct_split = [-7, -5, -3, -1.0, 0, 1, 3, 5, 7] self.pctEncoder = FloatEncoder(self.pct_split) self.kPattersMap = {} self.allTradyDayCount = 0 self.allTradeDay = 0 self.occurDayMap = {} for value in kPatters: self.kPattersMap[value] = True pass def onCreate(self): super().onCreate() self.occurDayMap.clear() self.allTradyDayCount = 0 self.allTradeDay = 0 def onStart(self, code: str) -> bool: self.allTradyDayCount = 0 return Find_KPattern_skip1_predit2.onStart(self, code) def onEnd(self, code: str): Find_KPattern_skip1_predit2.onEnd(self, code) self.allTradeDay = max(self.allTradeDay, self.allTradyDayCount) def collect(self, bar: BarData) -> TraceData: traceData = Find_KPattern_skip1_predit2.collect(self, bar) self.allTradyDayCount += 1 if not traceData is None: kPatternValue = traceData.kPatternValue if self.kPattersMap.get(kPatternValue) is None: ##过滤 return None return traceData def newCountData(self) -> CountData: data = Find_KPattern_skip1_predit2.newCountData(self) data.sell_disbute = np.zeros(self.pctEncoder.mask()) ##卖方力量分布情况 data.buy_disbute = np.zeros(self.pctEncoder.mask()) # 买方力量分布情况 return data def doWantedTraceData(self, traceData: Skip1_Predict2_TraceData, countData: CountData): Find_KPattern_skip1_predit2.doWantedTraceData(self, traceData, countData) sell_pct = traceData.sell_pct buy_pct = traceData.buy_pct ##统计买卖双方的分布情况 countData.buy_disbute[self.pctEncoder.encode(buy_pct)] += 1 countData.sell_disbute[self.pctEncoder.encode(sell_pct)] += 1 occurBar = traceData.occurBar dayKey = occurBar.datetime.year * 13 * 35 + occurBar.datetime.month * 13 + occurBar.datetime.day self.occurDayMap[dayKey] = True pass def onDestroy(self): Find_KPattern_skip1_predit2.onDestroy(self) if not self.print_on_destroy: return print(f"所有交易日中,有意义的k线形态出现占比:%.2f%%,allTradeDay = { self.allTradeDay}" % (100 * len(self.occurDayMap) / self.allTradeDay)) for kValue, dataItem in self.dataSet.items(): total_count1 = 0 total_count2 = 0 for cnt in dataItem.sell_disbute: total_count1 += cnt for cnt in dataItem.buy_disbute: total_count2 += cnt assert total_count1 == total_count2 assert total_count1 > 0 print(f"k线形态值:%6d, " % (kValue)) print(f" 卖方价格分布:") info = "" for encode in range(0, len(dataItem.sell_disbute)): occurtRate = 100 * dataItem.sell_disbute[encode] / total_count1 info += f"{self.pctEncoder.descriptEncdoe(encode)}:%.2f%%," % ( occurtRate) print(f" {info}") print(f" 买方价格分布:") info = "" for encode in range(0, len(dataItem.buy_disbute)): occurtRate = 100 * dataItem.buy_disbute[encode] / total_count1 info += f"{self.pctEncoder.descriptEncdoe(encode)}:%.2f%%," % ( occurtRate) print(f" {info}")
def __init__(self): self.lasted3Bar = np.array([None, None, None]) self.lasted3BarKdj = np.array([None, None, None]) self.kdjEncoder = FloatEncoder([15, 30, 45, 60, 75, 90]) self.sw = SWImpl()
def getSellFloatEncoder(self) -> FloatEncoder: return FloatEncoder(self.sellSplits)
def printKPatterMoreDetail(kPatters=[ 535, 359, 1239, 1415, 1072, 712, 1412, 1240, 1413, 888, 2823, 706, 1414, 1064 ]): from vnpy.trader.constant import Exchange from vnpy.trader.constant import Interval sw = SWImpl() lists = sw.getSW2List() start = datetime(2014, 5, 1) end = datetime(2020, 8, 17) pct_split = [-7, -5, -3, -1.5, -0.5, 0.5, 1.5, 3, 5, 7] #pct_split = [-7, -5, -3, -1.0, 0, 1, 3, 5, 7] pct_split = [2] pctEncoder = FloatEncoder(pct_split) kPattersMap = {} for value in kPatters: kPattersMap[value] = True class InnerData(object): kValue: int ## sell_disbute = np.zeros(pctEncoder.mask()) ##卖方力量分布情况 buy_disbute = np.zeros(pctEncoder.mask()) #买方力量分布情况 pass dataSet = {} occurDayMap = {} allTrayDay = 1 for code in lists: # for code in lists: barList = sw.getSW2Daily(code, start, end) indicator = Indicator(40) traceItems: ['TraceIn3DayItem'] = [] allTrayDay = max(allTrayDay, len(barList)) for bar in barList: ###跟踪数据 toDeleteList = [] for traceItem in traceItems: traceItem.onTraceBar(bar) if traceItem.isFinished(): toDeleteList.append(traceItem) if traceItem.isWanted(): occurBar = traceItem.firstBar dayKey = occurBar.datetime.year * 13 * 35 + occurBar.datetime.month * 13 + occurBar.datetime.day occurDayMap[dayKey] = True ###归纳到统计里面 innerData: InnerData = dataSet.get(traceItem.kPattern) if innerData is None: innerData = InnerData() innerData.kValue = traceItem.kPattern dataSet[traceItem.kPattern] = innerData sell_pct = traceItem.current_sell_pct buy_pct = traceItem.current_buy_pct innerData.buy_disbute[pctEncoder.encode(buy_pct)] += 1 innerData.sell_disbute[pctEncoder.encode( sell_pct)] += 1 pass for traceItem in toDeleteList: traceItems.remove(traceItem) indicator.update_bar(bar) kEncodeValue = KPattern.encode2KAgo1(indicator) if kEncodeValue is None or kPattersMap.get(kEncodeValue) is None: continue traceItem = TraceIn3DayItem(kEncodeValue, bar) traceItems.append(traceItem) print(f"所有交易日中,有意义的k线形态出现占比:%.2f%%" % (100 * len(occurDayMap) / allTrayDay)) for kValue, dataItem in dataSet.items(): total_count1 = 0 total_count2 = 0 for cnt in dataItem.sell_disbute: total_count1 += cnt for cnt in dataItem.buy_disbute: total_count2 += cnt assert total_count1 == total_count2 assert total_count1 > 0 print(f"\n\nk:%6d, " % (kValue)) print(f" 卖方价格分布:") for encode in range(0, len(dataItem.sell_disbute)): occurtRate = 100 * dataItem.sell_disbute[encode] / total_count1 print(f" {pctEncoder.descriptEncdoe(encode)}:%.2f%%" % (occurtRate)) print(f" 买方价格分布:") for encode in range(0, len(dataItem.buy_disbute)): occurtRate = 100 * dataItem.buy_disbute[encode] / total_count1 print(f" {pctEncoder.descriptEncdoe(encode)}:%.2f%%" % (occurtRate)) pass
class MyPattherCollector(KPatternCollector): # 收集指定的k线 collectKPatternOnly = True KPattern: [] = [712] pct_split = [-7, -5, -3, -1.5, -0.5, 0.5, 1.5, 3, 5, 7] # pct_split = [-7, -5, -3, -1.0, 0, 1, 3, 5, 7] # pct_split = [-0.5,0.5] pctEncoder = FloatEncoder(pct_split) def __init__(self): self.kPattersMap = {} self.sw = SWImpl() self.dataSet = [] for value in self.KPattern: self.kPattersMap[value] = True def onStart(self, code: str) -> bool: self.indicator = Indicator(40) self.traceCode = code self.traceName = self.sw.getSw2Name(code) return True """ 检查是否追踪某个k线形态,是的话,创建一个最终值对象。 """ def checkIfTrace(self, newBar: BarData) -> TraceIn3DayItem: self.indicator.update_bar(newBar) kEncodeValue = KPattern.encode2KAgo1(self.indicator) if kEncodeValue is None: return None if self.collectKPatternOnly and self.kPattersMap.get( kEncodeValue) is None: return None return TraceIn3DayItem(kEncodeValue, newBar) def onTraceFinish(self, traceItem: TraceIn3DayItem): if traceItem.isWanted(): ##收集数据。 data = [] data.append(self.traceCode) data.append(self.traceName) data.append(traceItem.kPattern) data.append(traceItem.first_buy_pct) data.append(traceItem.fisrt_sell_pct) data.append(traceItem.current_sell_pct) data.append(traceItem.current_buy_pct) self.dataSet.append(data) def onDestroy(self): import pandas as pd cloumns = [ "code", "name", "kPattern", "buy_price", "sell_price", "label_sell_price", "label_buy_price" ] wxl = pd.DataFrame(self.dataSet, columns=cloumns) writer = pd.ExcelWriter('files/sw_train_data_sample.xlsx') wxl.to_excel(writer, sheet_name="sample", index=False) writer.save() writer.close() print(f"dataSize = {len(self.dataSet)}")
def getPctEncoder2(self) -> FloatEncoder: return FloatEncoder( [-7.5, -5.5, -3.5, -2.5, -1.5, -0.5, 0.5, 1.5, 2.5, 3.5, 5.5, 7.5], minValue=-10, maxValue=10)
def getPctEncoder1(self) -> FloatEncoder: return FloatEncoder([-7, -5, -3, -2, -1, 0, 1, 2, 3, 5, 7], minValue=-10, maxValue=10)
class EngineModel2KAlgo2(CoreEngineModel): def __init__(self): self.lasted3Bar = np.array([None, None, None]) self.lasted3BarKdj = np.array([None, None, None]) self.kdjEncoder = FloatEncoder([15, 30, 45, 60, 75, 90]) self.sw = SWImpl() def onCollectStart(self, code: str) -> bool: from earnmi.chart.Indicator import Indicator self.indicator = Indicator(40) self.code = code return True def onCollectTrace(self, bar: BarData) -> CollectData: self.indicator.update_bar(bar) self.lasted3Bar[:-1] = self.lasted3Bar[1:] self.lasted3BarKdj[:-1] = self.lasted3BarKdj[1:] k, d, j = self.indicator.kdj(fast_period=9, slow_period=3) self.lasted3Bar[-1] = bar self.lasted3BarKdj[-1] = [k, d, j] if self.indicator.count >= 20: from earnmi.chart.KPattern import KPattern kPatternValue = KPattern.encode2KAgo1(self.indicator) if not kPatternValue is None: _kdj_mask = self.kdjEncoder.mask() kPatternValue = kPatternValue * _kdj_mask * _kdj_mask + self.kdjEncoder.encode( k) * _kdj_mask + self.kdjEncoder.encode(d) dimen = Dimension(type=TYPE_2KAGO1, value=kPatternValue) collectData = CollectData(dimen=dimen) collectData.occurBars.append(self.lasted3Bar[-2]) collectData.occurBars.append(self.lasted3Bar[-1]) collectData.occurKdj.append(self.lasted3BarKdj[-2]) collectData.occurKdj.append(self.lasted3BarKdj[-1]) return collectData return None def onCollect(self, data: CollectData, newBar: BarData) -> bool: if len(data.occurBars) < 3: data.occurBars.append(self.lasted3Bar[-1]) data.occurKdj.append(self.lasted3BarKdj[-1]) else: data.predictBars.append(newBar) size = len(data.predictBars) return size >= 2 @abstractmethod def getYLabelPrice(self, cData: CollectData) -> [float, float, float]: bars: ['BarData'] = cData.predictBars if len(bars) > 0: sell_price = -9999999999 buy_price = -sell_price for bar in bars: sell_price = max((bar.high_price + bar.close_price) / 2, sell_price) buy_price = min((bar.low_price + bar.close_price) / 2, buy_price) return sell_price, buy_price return None, None def getYBasePrice(self, cData: CollectData) -> float: return cData.occurBars[-2].close_price def generateXFeature(self, cData: CollectData) -> []: #保证len小于三,要不然就不能作为生成特征值。 if (len(cData.occurBars) < 3): return None occurBar = cData.occurBars[-2] skipBar = cData.occurBars[-1] kdj = cData.occurKdj[-1] sell_pct = 100 * ((skipBar.high_price + skipBar.close_price) / 2 - occurBar.close_price) / occurBar.close_price buy_pct = 100 * ((skipBar.low_price + skipBar.close_price) / 2 - occurBar.close_price) / occurBar.close_price def set_0_between_100(x): if x > 100: return 100 if x < 0: return 0 return x def percent_to_one(x): return int(x * 100) / 1000.0 data = [] data.append(percent_to_one(buy_pct)) data.append(percent_to_one(sell_pct)) data.append(set_0_between_100(kdj[0]) / 100) data.append(set_0_between_100(kdj[2]) / 100) return data
if abs(dif_value) > 20: print(f"dif:{dif_value},code={code},bar = {bar}") if abs(dea_value) > 20: print(f"dea:{dea_value},code={code},bar = {bar}") bars, code = souces.nextBars() dif_list = np.array(dif_list) dea_list = np.array(dea_list) def_min,def_max = [dea_list.min(),dea_list.max()] dif_min,dif_max = [dif_list.min(),dif_list.max()] print(f"dif_max:{dif_max},dif_min:{dif_min},count:{len(dif_list)}") print(f"dea_max:{def_max},dea_min:{def_min}") N = 5 dif_spli_list = [] for i in range(0,N+1): dif_spli_list.append(dif_min + i * (dif_max - dif_min) / N) dif_spli_list = [-10,-5, -2.5,2.5,5,10] dea_spli_list = [] for i in range(0,N+1): dea_spli_list.append(def_min + i * (def_max - def_min) / N) dea_spli_list = [-10,-5,-2.5,2.5,5,10] dif_encoder = FloatEncoder(dif_spli_list) dea_encoder = FloatEncoder(dea_spli_list) print(f"dif分布:{FloatRange.toStr(dif_encoder.computeValueDisbustion(dif_list),dif_encoder)}") print(f"dea分布:{FloatRange.toStr(dea_encoder.computeValueDisbustion(dea_list),dea_encoder)}") pass