Exemplo n.º 1
0
 def __init__(self, messages=None):
     self.messages = messages
     self.wordTypeInfer = WholeFieldTypeInfer(self.messages)
     self.cvter = Converter()
     self.wcvter = word_convert()
     self.msgSplt = MsgSpliter()
     self.dataTuning = DataTuning()
     self.icsSymTree = IcsSymbolToTree()
Exemplo n.º 2
0
 def __init__(self):
     super().__init__()
     self.converter = Converter()
     self.msgSpliter = MsgSpliter()
     self.redis_dealer = redis_deal()
     self.splt = splitter()
     self.desiner = Desiner()
     self.msAb = MeasureAb()
     self.cvt = Converter()
     self.dataTuning = DataTuning()
Exemplo n.º 3
0
class TextParseLogic:
    def __init__(self):
        self.name = 'parser'
        self.msgSpliter = MsgSpliter()

    def split(self, messages, delimiter):
        t_messages = []
        for message in messages:
            t_messages.append(message.split(delimiter))
        return t_messages

    def ConvertDataToMessage(self, messages, delimeter, h=0):
        textDatas = []
        splitDatas = self.split(messages, delimeter)
        i = 0
        while (i < len(messages)):
            textModel = TextModel(messages[i], splitDatas[i], i, h)
            textDatas.append(textModel)
            i = i + 1
        return textDatas

    def spltMsgs(self, messages, delimiter):
        spltmsgs = []
        for message in messages:
            spltmsgs.append([str(itom) for itom in message.split(delimiter)])
        headers = []
        for i in range(6):
            if i % 2 != 0:
                headers.append('field' + str(i))
            else:
                headers.append(str(delimiter))
        return headers, spltmsgs

    def spltMsgsSimple(self, messages, delimiter, maxRange=150):
        spltmsgs = []
        for message in messages:
            spltmsgs.append(message.split(delimiter))
        return self.msgSpliter.splitTextMsgs(spltmsgs, delimiter, maxRange)
Exemplo n.º 4
0
 def __init__(self):
     self.name = 'parser'
     self.msgSpliter = MsgSpliter()
Exemplo n.º 5
0
 def __init__(self):
     super().__init__()
     self.converter = Converter()
     self.msgSpliter = MsgSpliter()
     self.redis_dealer = redis_deal()
Exemplo n.º 6
0
class GvoterLogic(splitter):
    def __init__(self):
        super().__init__()
        self.converter = Converter()
        self.msgSpliter = MsgSpliter()
        self.redis_dealer = redis_deal()

    def getGVotes(self, configParas, messages):
        freVotes = self.getFreVotes(configParas, messages)
        entryVotes = self.getEntryVotes(configParas, messages)
        freGVotes = self.converter.MergeListDics(freVotes)
        entryGVotes = self.converter.MergeListDics(entryVotes)
        return freGVotes, entryGVotes

    def getBoundaries(self, configParas, gveConfigParas, messages):
        freGVotes, entryGVotes = self.getGVotes(configParas, messages)
        desiner = Desiner()
        paraFre = {}
        paraFre['diff_measure'] = gveConfigParas['diffMeasure']
        paraFre['vWay'] = gveConfigParas['vWayFre']
        paraFre['T'] = gveConfigParas['T']
        paraFre['r'] = gveConfigParas['r']
        freBoundaries = desiner.VoteSingleByDicParas(paraFre, freGVotes)
        paraFre['vWay'] = gveConfigParas['vWayEntry']
        entryBoundaries = desiner.VoteSingleByDicParas(paraFre, entryGVotes)
        return Converter().MergeLists(freBoundaries, entryBoundaries)

    def getCommonRange(self, messages):
        heads = [len(message) for message in messages]
        t_head = min(heads)
        t_fhead = min(23, t_head + 2)
        return t_fhead

    def filterBoundaries(self, boundaries, cRange):
        rBoundaries = []
        for boundary in boundaries:
            if boundary < cRange:
                rBoundaries.append(boundary)
            else:
                break
        return rBoundaries

    def getGBoundaries(self, boundaries, messages):
        cRange = self.getCommonRange(messages)
        cBoundaries = self.filterBoundaries(boundaries, cRange)
        vSpliter = vertical_splitter(messages)
        merGer = base_merger()
        return cBoundaries

    def getSplitMessages(self,
                         configParas,
                         gveConfigParas,
                         messages,
                         FType='G'):
        splitKey = '{}_{}'.format(configParas.getUserPathDynamic(), 'GSplit')
        gBoundaries = None
        if FType == 'Y' and self.redis_dealer.is_exist_key(splitKey):
            gBoundaries = json.loads(
                self.redis_dealer.read_from_redis(splitKey))
        else:
            boundaries = self.getBoundaries(configParas, gveConfigParas,
                                            messages)
            gBoundaries = self.getGBoundaries(boundaries, messages)
            jsongBoundaries = json.dumps(gBoundaries)
            self.redis_dealer.insert_to_redis(splitKey, jsongBoundaries)
        return gBoundaries

    def splitMessages(self,
                      configParas,
                      gveConfigParas,
                      messages,
                      maxRange=15):
        gBoundaries = self.getSplitMessages(configParas, gveConfigParas,
                                            messages)
        return self.msgSpliter.splitMessages(
            [gBoundaries for i in range(len(messages))], messages, maxRange)

    def splitFileMessages(self, filePath, messages, maxRange=15):
        gVeParas = GveConf.geneGveParas()
        uConfig = UserConfig('/home/wxw/data/ToolDatas/15895903730.10.222',
                             '15895903730')
        messageSplitSums = self.splitMessages(uConfig, gVeParas, messages,
                                              maxRange)
        return messageSplitSums
Exemplo n.º 7
0
class FormatGeneLogic:
    def __init__(self, messages=None):
        self.messages = messages
        self.wordTypeInfer = WholeFieldTypeInfer(self.messages)
        self.cvter = Converter()
        self.wcvter = word_convert()
        self.msgSplt = MsgSpliter()
        self.dataTuning = DataTuning()
        self.icsSymTree = IcsSymbolToTree()

    def getRanges(self, messages):
        L_len = 65536
        for message in messages:
            if len(message) < L_len:
                L_len = len(message)
        return min(23, L_len + 2)

    def getMesFormat(self):
        pass

    def sortWordsType(self, words):
        words = sorted(words.items(), key=lambda x: x[0][0])
        return words

    def getGFormat(self, congigParas, gVeparas):
        gVoterLogic = GvoterLogic()
        boundaries = gVoterLogic.getSplitMessages(congigParas,
                                                  gVeparas,
                                                  self.messages,
                                                  FType='G')
        boundaries = self.cvter.border2item(boundaries)
        fRange = self.getRanges(self.messages)
        LoRdj = ReAjustLogic(boundaries, self.messages)
        LoRdj.reSplit()
        LoRdj.reCluster()
        boundaries = LoRdj.words
        wordsType = self.wordTypeInfer.extractWords(boundaries, fRange)
        wordsType = self.sortWordsType(wordsType)
        boundaries = self.wcvter.itemtoborder(boundaries)
        return boundaries, wordsType

    def getCFormat(self, configParas, gVeparas, msgs):
        if len(msgs) < 10:
            return [((0, -1), 7)]
        gVoterLogic = GvoterLogic()
        boundaries = gVoterLogic.getSplitMessages(configParas,
                                                  gVeparas,
                                                  msgs,
                                                  FType='C')
        boundaries = self.cvter.border2item(boundaries)
        #print('ss')
        #print(len(msgs))
        #print(msgs[0])
        #print(boundaries)
        #print('ee')
        fRange = self.getRanges(msgs)
        boundaries = self.cvter.filterB(boundaries, fRange)
        LoRdj = ReAjustLogic(boundaries, msgs)
        LoRdj.reSplit()
        LoRdj.reCluster()
        boundaries = LoRdj.words
        cWordTypeInfer = WholeFieldTypeInfer(msgs)
        wordsType = cWordTypeInfer.extractCWords(boundaries)
        wordsType = self.sortWordsType(wordsType)
        return wordsType

    def clsByFunc(self, los):
        tCls = {}
        for msg in self.messages:
            tFunc = msg[los[0]:los[1]]
            if tFunc not in tCls:
                tCls[tFunc] = []
            tCls[tFunc].append(msg[los[1]:])
        return tCls

    def GTreeGenerate(self, configParas, gVeparas):
        _, wordsInfer = self.getGFormat(configParas, gVeparas)
        fcCode = None
        for word in wordsInfer:
            if word[1] == 0:
                fcCode = word[0]
                break
        tFunMsgs = self.clsByFunc(fcCode)
        for fcKey in tFunMsgs:
            tFunMsgs[fcKey] = self.getCFormat(configParas, gVeparas,
                                              tFunMsgs[fcKey])
        return wordsInfer, tFunMsgs
        #print(tFunMsgs[fcKey])

    def GTJsonTree(self, configParas, gVeparas):
        gFormat, cFormats = self.GTreeGenerate(configParas, gVeparas)
        print(gFormat)
        groot = self.icsSymTree.icsSymToTree(gFormat, cFormats)
        return groot.transToIcsDictTree()

    def changeFormat(self, boundaries, wordsType):
        boundaries = [boundaries for i in range(len(self.messages))]
        gForMsg = self.msgSplt.splitMsgByTypes(boundaries, self.messages)
        wordTHeaders = []
        for wordType in wordsType:
            wordTHeaders.append(self.wordTypeInfer.cVertNumToName(wordType[1]))
        return wordTHeaders, gForMsg

    def getGJson(self, congigParas, gVeparas):
        boundaries, wType = self.getGFormat(congigParas, gVeparas)
        return self.changeFormat(boundaries, wType)
        #print(boundaries)

    def getGF(self, uId=' '):
        # future
        uConfig = UserConfig('/home/wxw/data/ToolDatas/15895903730.10.222',
                             '15895903730')
        gVeParas = GveConf.geneGveParas()
        return self.getGJson(uConfig, gVeParas)

    def combineFormats(self):
        pass

    def clsMessages(self):
        pass
Exemplo n.º 8
0
class MegSplitLogic:
    def __init__(self):
        super().__init__()
        self.converter = Converter()
        self.msgSpliter = MsgSpliter()
        self.redis_dealer = redis_deal()
        self.splt = splitter()
        self.desiner = Desiner()
        self.msAb = MeasureAb()
        self.cvt = Converter()
        self.dataTuning = DataTuning()

    def getOrderBorders(self, gveConfigParas, messages):
        borderDicts = self.splt.getOrderVotesByMsgs(messages)
        paraFre = {}
        paraFre['diff_measure'] = gveConfigParas['diffMeasure']
        paraFre['vWay'] = gveConfigParas['vWayFre']
        paraFre['T'] = gveConfigParas['T']
        paraFre['r'] = gveConfigParas['r']
        return self.desiner.VoteMultyByDicParas(paraFre, borderDicts)

    def getEntryBorders(self, gveConfigParas, messages):
        entryDicts = self.splt.getEntryVotesByMsgs(messages)
        paraFre = {}
        paraFre['diff_measure'] = gveConfigParas['diffMeasure']
        paraFre['vWay'] = gveConfigParas['vWayFre']
        paraFre['T'] = gveConfigParas['T']
        paraFre['r'] = gveConfigParas['r']
        return self.desiner.VoteMultyByDicParas(paraFre, entryDicts)

    def getMbourders(self, gveConfigParas, messages):
        VeDicts = self.splt.getVeVotesByMsg(messages)
        paraFre = {}
        paraFre['diff_measure'] = gveConfigParas['diffMeasure']
        paraFre['vWay'] = gveConfigParas['vWayFre']
        paraFre['T'] = gveConfigParas['T']
        paraFre['r'] = gveConfigParas['r']
        print(VeDicts[0])
        return self.desiner.VoteMultyByDicParas(paraFre, VeDicts)

    def getFreBorders(self, gveConfigParas, messages):
        freDicts = self.splt.getFreVotesByMsg(messages)
        paraFre = {}
        paraFre['diff_measure'] = gveConfigParas['diffMeasure']
        paraFre['vWay'] = gveConfigParas['vWayFre']
        paraFre['T'] = gveConfigParas['T']
        paraFre['r'] = gveConfigParas['r']
        return self.desiner.VoteMultyByDicParas(paraFre, freDicts)

    def msgSplit(self, borders, msgs, maxRange=15):
        return self.msgSpliter.splitMessages(borders, msgs, maxRange)

    def getOrderBordersNyPath(self, filePath='', msgs=None, maxRange=15):
        # future update
        veParas = {'diffMeasure': 'abs', 'vWayFre': 'loose', 'T': 0, 'r': 0.3}
        # future update
        #if filePath != '':
        #    msgs = self.dataTuning.readDatas(filePath)
        borders = self.getOrderBorders(veParas, msgs)
        spltMsgs = self.msgSplit(borders, msgs, maxRange)
        return borders, spltMsgs