def __init__(self, messages=None): self.messages = messages self.wordTypeInfer = WholeFieldTypeInfer(self.messages) self.cvter = Converter() self.wcvter = word_convert() self.msgSplt = MsgSpliter() self.dataTuning = DataTuning() self.icsSymTree = IcsSymbolToTree()
def __init__(self): super().__init__() self.converter = Converter() self.msgSpliter = MsgSpliter() self.redis_dealer = redis_deal() self.splt = splitter() self.desiner = Desiner() self.msAb = MeasureAb() self.cvt = Converter() self.dataTuning = DataTuning()
class TextParseLogic: def __init__(self): self.name = 'parser' self.msgSpliter = MsgSpliter() def split(self, messages, delimiter): t_messages = [] for message in messages: t_messages.append(message.split(delimiter)) return t_messages def ConvertDataToMessage(self, messages, delimeter, h=0): textDatas = [] splitDatas = self.split(messages, delimeter) i = 0 while (i < len(messages)): textModel = TextModel(messages[i], splitDatas[i], i, h) textDatas.append(textModel) i = i + 1 return textDatas def spltMsgs(self, messages, delimiter): spltmsgs = [] for message in messages: spltmsgs.append([str(itom) for itom in message.split(delimiter)]) headers = [] for i in range(6): if i % 2 != 0: headers.append('field' + str(i)) else: headers.append(str(delimiter)) return headers, spltmsgs def spltMsgsSimple(self, messages, delimiter, maxRange=150): spltmsgs = [] for message in messages: spltmsgs.append(message.split(delimiter)) return self.msgSpliter.splitTextMsgs(spltmsgs, delimiter, maxRange)
def __init__(self): self.name = 'parser' self.msgSpliter = MsgSpliter()
def __init__(self): super().__init__() self.converter = Converter() self.msgSpliter = MsgSpliter() self.redis_dealer = redis_deal()
class GvoterLogic(splitter): def __init__(self): super().__init__() self.converter = Converter() self.msgSpliter = MsgSpliter() self.redis_dealer = redis_deal() def getGVotes(self, configParas, messages): freVotes = self.getFreVotes(configParas, messages) entryVotes = self.getEntryVotes(configParas, messages) freGVotes = self.converter.MergeListDics(freVotes) entryGVotes = self.converter.MergeListDics(entryVotes) return freGVotes, entryGVotes def getBoundaries(self, configParas, gveConfigParas, messages): freGVotes, entryGVotes = self.getGVotes(configParas, messages) desiner = Desiner() paraFre = {} paraFre['diff_measure'] = gveConfigParas['diffMeasure'] paraFre['vWay'] = gveConfigParas['vWayFre'] paraFre['T'] = gveConfigParas['T'] paraFre['r'] = gveConfigParas['r'] freBoundaries = desiner.VoteSingleByDicParas(paraFre, freGVotes) paraFre['vWay'] = gveConfigParas['vWayEntry'] entryBoundaries = desiner.VoteSingleByDicParas(paraFre, entryGVotes) return Converter().MergeLists(freBoundaries, entryBoundaries) def getCommonRange(self, messages): heads = [len(message) for message in messages] t_head = min(heads) t_fhead = min(23, t_head + 2) return t_fhead def filterBoundaries(self, boundaries, cRange): rBoundaries = [] for boundary in boundaries: if boundary < cRange: rBoundaries.append(boundary) else: break return rBoundaries def getGBoundaries(self, boundaries, messages): cRange = self.getCommonRange(messages) cBoundaries = self.filterBoundaries(boundaries, cRange) vSpliter = vertical_splitter(messages) merGer = base_merger() return cBoundaries def getSplitMessages(self, configParas, gveConfigParas, messages, FType='G'): splitKey = '{}_{}'.format(configParas.getUserPathDynamic(), 'GSplit') gBoundaries = None if FType == 'Y' and self.redis_dealer.is_exist_key(splitKey): gBoundaries = json.loads( self.redis_dealer.read_from_redis(splitKey)) else: boundaries = self.getBoundaries(configParas, gveConfigParas, messages) gBoundaries = self.getGBoundaries(boundaries, messages) jsongBoundaries = json.dumps(gBoundaries) self.redis_dealer.insert_to_redis(splitKey, jsongBoundaries) return gBoundaries def splitMessages(self, configParas, gveConfigParas, messages, maxRange=15): gBoundaries = self.getSplitMessages(configParas, gveConfigParas, messages) return self.msgSpliter.splitMessages( [gBoundaries for i in range(len(messages))], messages, maxRange) def splitFileMessages(self, filePath, messages, maxRange=15): gVeParas = GveConf.geneGveParas() uConfig = UserConfig('/home/wxw/data/ToolDatas/15895903730.10.222', '15895903730') messageSplitSums = self.splitMessages(uConfig, gVeParas, messages, maxRange) return messageSplitSums
class FormatGeneLogic: def __init__(self, messages=None): self.messages = messages self.wordTypeInfer = WholeFieldTypeInfer(self.messages) self.cvter = Converter() self.wcvter = word_convert() self.msgSplt = MsgSpliter() self.dataTuning = DataTuning() self.icsSymTree = IcsSymbolToTree() def getRanges(self, messages): L_len = 65536 for message in messages: if len(message) < L_len: L_len = len(message) return min(23, L_len + 2) def getMesFormat(self): pass def sortWordsType(self, words): words = sorted(words.items(), key=lambda x: x[0][0]) return words def getGFormat(self, congigParas, gVeparas): gVoterLogic = GvoterLogic() boundaries = gVoterLogic.getSplitMessages(congigParas, gVeparas, self.messages, FType='G') boundaries = self.cvter.border2item(boundaries) fRange = self.getRanges(self.messages) LoRdj = ReAjustLogic(boundaries, self.messages) LoRdj.reSplit() LoRdj.reCluster() boundaries = LoRdj.words wordsType = self.wordTypeInfer.extractWords(boundaries, fRange) wordsType = self.sortWordsType(wordsType) boundaries = self.wcvter.itemtoborder(boundaries) return boundaries, wordsType def getCFormat(self, configParas, gVeparas, msgs): if len(msgs) < 10: return [((0, -1), 7)] gVoterLogic = GvoterLogic() boundaries = gVoterLogic.getSplitMessages(configParas, gVeparas, msgs, FType='C') boundaries = self.cvter.border2item(boundaries) #print('ss') #print(len(msgs)) #print(msgs[0]) #print(boundaries) #print('ee') fRange = self.getRanges(msgs) boundaries = self.cvter.filterB(boundaries, fRange) LoRdj = ReAjustLogic(boundaries, msgs) LoRdj.reSplit() LoRdj.reCluster() boundaries = LoRdj.words cWordTypeInfer = WholeFieldTypeInfer(msgs) wordsType = cWordTypeInfer.extractCWords(boundaries) wordsType = self.sortWordsType(wordsType) return wordsType def clsByFunc(self, los): tCls = {} for msg in self.messages: tFunc = msg[los[0]:los[1]] if tFunc not in tCls: tCls[tFunc] = [] tCls[tFunc].append(msg[los[1]:]) return tCls def GTreeGenerate(self, configParas, gVeparas): _, wordsInfer = self.getGFormat(configParas, gVeparas) fcCode = None for word in wordsInfer: if word[1] == 0: fcCode = word[0] break tFunMsgs = self.clsByFunc(fcCode) for fcKey in tFunMsgs: tFunMsgs[fcKey] = self.getCFormat(configParas, gVeparas, tFunMsgs[fcKey]) return wordsInfer, tFunMsgs #print(tFunMsgs[fcKey]) def GTJsonTree(self, configParas, gVeparas): gFormat, cFormats = self.GTreeGenerate(configParas, gVeparas) print(gFormat) groot = self.icsSymTree.icsSymToTree(gFormat, cFormats) return groot.transToIcsDictTree() def changeFormat(self, boundaries, wordsType): boundaries = [boundaries for i in range(len(self.messages))] gForMsg = self.msgSplt.splitMsgByTypes(boundaries, self.messages) wordTHeaders = [] for wordType in wordsType: wordTHeaders.append(self.wordTypeInfer.cVertNumToName(wordType[1])) return wordTHeaders, gForMsg def getGJson(self, congigParas, gVeparas): boundaries, wType = self.getGFormat(congigParas, gVeparas) return self.changeFormat(boundaries, wType) #print(boundaries) def getGF(self, uId=' '): # future uConfig = UserConfig('/home/wxw/data/ToolDatas/15895903730.10.222', '15895903730') gVeParas = GveConf.geneGveParas() return self.getGJson(uConfig, gVeParas) def combineFormats(self): pass def clsMessages(self): pass
class MegSplitLogic: def __init__(self): super().__init__() self.converter = Converter() self.msgSpliter = MsgSpliter() self.redis_dealer = redis_deal() self.splt = splitter() self.desiner = Desiner() self.msAb = MeasureAb() self.cvt = Converter() self.dataTuning = DataTuning() def getOrderBorders(self, gveConfigParas, messages): borderDicts = self.splt.getOrderVotesByMsgs(messages) paraFre = {} paraFre['diff_measure'] = gveConfigParas['diffMeasure'] paraFre['vWay'] = gveConfigParas['vWayFre'] paraFre['T'] = gveConfigParas['T'] paraFre['r'] = gveConfigParas['r'] return self.desiner.VoteMultyByDicParas(paraFre, borderDicts) def getEntryBorders(self, gveConfigParas, messages): entryDicts = self.splt.getEntryVotesByMsgs(messages) paraFre = {} paraFre['diff_measure'] = gveConfigParas['diffMeasure'] paraFre['vWay'] = gveConfigParas['vWayFre'] paraFre['T'] = gveConfigParas['T'] paraFre['r'] = gveConfigParas['r'] return self.desiner.VoteMultyByDicParas(paraFre, entryDicts) def getMbourders(self, gveConfigParas, messages): VeDicts = self.splt.getVeVotesByMsg(messages) paraFre = {} paraFre['diff_measure'] = gveConfigParas['diffMeasure'] paraFre['vWay'] = gveConfigParas['vWayFre'] paraFre['T'] = gveConfigParas['T'] paraFre['r'] = gveConfigParas['r'] print(VeDicts[0]) return self.desiner.VoteMultyByDicParas(paraFre, VeDicts) def getFreBorders(self, gveConfigParas, messages): freDicts = self.splt.getFreVotesByMsg(messages) paraFre = {} paraFre['diff_measure'] = gveConfigParas['diffMeasure'] paraFre['vWay'] = gveConfigParas['vWayFre'] paraFre['T'] = gveConfigParas['T'] paraFre['r'] = gveConfigParas['r'] return self.desiner.VoteMultyByDicParas(paraFre, freDicts) def msgSplit(self, borders, msgs, maxRange=15): return self.msgSpliter.splitMessages(borders, msgs, maxRange) def getOrderBordersNyPath(self, filePath='', msgs=None, maxRange=15): # future update veParas = {'diffMeasure': 'abs', 'vWayFre': 'loose', 'T': 0, 'r': 0.3} # future update #if filePath != '': # msgs = self.dataTuning.readDatas(filePath) borders = self.getOrderBorders(veParas, msgs) spltMsgs = self.msgSplit(borders, msgs, maxRange) return borders, spltMsgs