def _genSignalIDs(self, idt, original_ids, signal_type): IDs = original_ids FilterLevel = 0 for i in range(self.FiltrationLevel): iArgs = self["第"+str(i)+"层"] if iArgs.SignalType!=signal_type: continue if iArgs.IDFilter: iIDs = self._FT.getFilteredID(idt, id_filter_str=iArgs.IDFilter) IDs = sorted(set(iIDs).intersection(set(IDs))) if iArgs.GroupFactors: GroupData = self._FT.readData(dts=[idt], ids=IDs, factor_names=list(iArgs.GroupFactors)).iloc[:,0,:] if GroupData.shape[0]>0: GroupData[pd.isnull(GroupData)] = np.nan AllGroups = [GroupData[iGroup].unique().tolist() for iGroup in iArgs.GroupFactors] AllGroups = CartesianProduct(AllGroups) IDs = [] for jGroup in AllGroups: jMask = pd.Series(True, index=GroupData.index) for k, kSubGroup in enumerate(jGroup): if pd.notnull(kSubGroup): jMask = (jMask & (GroupData[iArgs.GroupFactors[k]]==kSubGroup)) else: jMask = (jMask & pd.isnull(GroupData[iArgs.GroupFactors[k]])) jIDs = self._filtrateID(idt, GroupData[jMask].index.tolist(), iArgs) IDs += jIDs else: IDs = self._filtrateID(idt, IDs, iArgs) FilterLevel += 1 if FilterLevel>0: return IDs else: return []
def __QS_prepareRawData__(self, factor_names, ids, dts, args={}): FactorInfo = self._FactorDB._FactorInfo.loc[self.Name] Fields = [self._IDField]+factor_names DBFields = FactorInfo["DBFieldName"].loc[Fields].tolist() DBTableName = self._FactorDB._TableInfo.loc[self.Name, "DBTableName"] RawData = pd.DataFrame(columns=DBFields) FieldStr = ",".join(DBFields) ConditionField = FactorInfo[pd.notnull(FactorInfo["Supplementary"])] if ConditionField.shape[0]>0: SingleCondition, MultiCondition = {}, OrderedDict() for i, iCondition in enumerate(ConditionField.index): iConditionValue = args.get(iCondition, self[iCondition]) if iConditionValue=="All": MultiCondition[ConditionField["DBFieldName"].iloc[i]] = ConditionField["Supplementary"].iloc[i].split(",")[1:] else: SingleCondition[ConditionField["DBFieldName"].iloc[i]] = iConditionValue if MultiCondition: RawData = None MultiCondition, MultiConditionValue = list(MultiCondition.keys()), CartesianProduct(list(MultiCondition.values())) for iMultiConditionValue in MultiConditionValue: SingleCondition.update(dict(zip(MultiCondition, iMultiConditionValue))) if RawData is None: RawData = self._FactorDB._ts.query(DBTableName, fields=FieldStr, **SingleCondition) else: RawData = RawData.append(self._FactorDB._ts.query(DBTableName, fields=FieldStr, **SingleCondition)) else: RawData = self._FactorDB._ts.query(DBTableName, fields=FieldStr, **SingleCondition) else: RawData = self._FactorDB._ts.query(DBTableName, fields=FieldStr) RawData = RawData.loc[:, DBFields] RawData.columns = ["ID"]+factor_names RawData["ID"] = self._FactorDB.DBID2ID(RawData["ID"]) return RawData
def _allocateWeight(self, idt, ids, original_ids, args): nID = len(ids) if not args.GroupFactors:# 没有类别因子 if args.WeightFactor=='等权': NewSignal = pd.Series(1/nID, index=ids) else: WeightData = self._FT.readData(factor_names=[args.WeightFactor], dts=[idt], ids=ids).iloc[0,0,:] if args.WeightMiss=='舍弃': WeightData = WeightData[pd.notnull(WeightData)] else: WeightData[pd.notnull(WeightData)] = WeightData.mean() WeightData = WeightData / WeightData.sum() NewSignal = WeightData else: GroupData = self._FT.readData(factor_names=args.GroupFactors, dts=[idt], ids=original_ids).iloc[:,0,:] GroupData[pd.isnull(GroupData)] = np.nan AllGroups = [GroupData[iGroup].unique().tolist() for iGroup in args.GroupFactors] AllGroups = CartesianProduct(AllGroups) nGroup = len(AllGroups) if args.GroupWeight=='等权': GroupWeight = pd.Series(np.ones(nGroup)/nGroup, dtype='float') else: GroupWeight = pd.Series(index=np.arange(nGroup), dtype='float') GroupWeightData = self._FT.readData(factor_names=[args.GroupWeight], dts=[idt], ids=original_ids).iloc[0,0,:] for i, iGroup in enumerate(AllGroups): if pd.notnull(iGroup[0]): iMask = (GroupData[args.GroupFactors[0]]==iGroup[0]) else: iMask = pd.isnull(GroupData[args.GroupFactors[0]]) for j, jSubGroup in enumerate(iGroup[1:]): if pd.notnull(jSubGroup): iMask = (iMask & (GroupData[args.GroupFactors[j+1]]==jSubGroup)) else: iMask = (iMask & pd.isnull(GroupData[args.GroupFactors[j+1]])) GroupWeight.iloc[i] = GroupWeightData[iMask].sum() GroupWeight[pd.isnull(GroupWeight)] = 0 GroupTotalWeight = GroupWeight.sum() if GroupTotalWeight!=0: GroupWeight = GroupWeight/GroupTotalWeight if args.WeightFactor=='等权': WeightData = pd.Series(1.0, index=original_ids) else: WeightData = self._FT.readData(factor_names=[args.WeightFactor], dts=[idt], ids=original_ids).iloc[0,0,:] SelectedGroupData = GroupData.loc[ids] NewSignal = pd.Series() for i, iGroup in enumerate(AllGroups): if pd.notnull(iGroup[0]): iMask = (SelectedGroupData[args.GroupFactors[0]]==iGroup[0]) else: iMask = pd.isnull(SelectedGroupData[args.GroupFactors[0]]) for j, jSubGroup in enumerate(iGroup[1:]): if pd.notnull(jSubGroup): iMask = (iMask & (SelectedGroupData[args.GroupFactors[j+1]]==jSubGroup)) else: iMask = (iMask & pd.isnull(SelectedGroupData[args.GroupFactors[j+1]])) iIDs = SelectedGroupData[iMask].index.tolist() if (iIDs==[]) and (args.GroupMiss=='全配'): if pd.notnull(iGroup[0]): iMask = (GroupData[args.GroupFactors[0]]==iGroup[0]) else: iMask = pd.isnull(GroupData[args.GroupFactors[0]]) for k, kSubClass in enumerate(iGroup[1:]): if pd.notnull(kSubClass): iMask = (iMask & (GroupData[args.GroupFactors[k+1]]==kSubClass)) else: iMask = (iMask & pd.isnull(GroupData[args.GroupFactors[k+1]])) iIDs = GroupData[iMask].index.tolist() elif (iIDs==[]) and (args.GroupMiss=='忽略'): continue iSignal = WeightData.loc[iIDs] iSignalWeight = iSignal.sum() if iSignalWeight!=0: iSignal = iSignal / iSignalWeight * GroupWeight.iloc[i] else: iSignal = iSignal*0.0 if args.WeightMiss=='填充均值': iSignal[pd.isnull(iSignal)] = iSignal.mean() NewSignal = NewSignal.append(iSignal[pd.notnull(iSignal) & (iSignal!=0)]) NewSignal = NewSignal / NewSignal.sum() return NewSignal
def changeMultiClass2SingleClass(multi_class, sep=None): MultiClass = [] for i in range(multi_class.shape[1]): MultiClass.append(pd.unique(multi_class[:, i]).tolist()) MultiClass = CartesianProduct(MultiClass) SingleClassData = np.empty(shape=(multi_class.shape[0], ), dtype="O") ClassDict = {} for i, iMultiClass in enumerate(MultiClass): iMask = np.array([True] * multi_class.shape[0]) if sep is not None: iSingleClass = sep.join(map(str, iMultiClass)) else: iSingleClass = str(i) for j, jSubClass in enumerate(iMultiClass): if pd.notnull(jSubClass): iMask = iMask & (multi_class[:, j] == jSubClass) else: iMask = iMask & pd.isnull(multi_class[:, j]) SingleClassData[iMask] = iSingleClass ClassDict[iSingleClass] = iMultiClass return (SingleClassData, ClassDict)