Exemplo n.º 1
0
class TextClassify:
    def __init__(self, messages):
        self.messages = messages
        self.cVerTer = Converter()
        self.freWords = {}
        self.mger = WordsMerger()
        self.analyzer = base_analyzer()
        self.features = None

    def merGeWords(self, freWords):
        comBineFreWords = []
        for words in freWords:
            words = [word.split('_') for word in words]
            comBineFreWords.append(self.mger.mergeWords(words))
        return comBineFreWords

    def cntWord(self, wSize, TK, wLen):
        freWords, _ = get_messages(self.messages, wSize, TK, wLen)
        wSetv = set()
        for freWordList in freWords:
            for word in freWordList:
                if word not in self.freWords:
                    wSetv.add(word)
        textList = self.cVerTer.ConvertRawToLengthTexts(self.messages, '_')
        for word in wSetv:
            self.freWords[word] = textList.count(word)
        return self.freWords

    def cntPro(self, words=None):
        if words == None:
            self.freWords = self.analyzer.convert_num_to_frequent(
                self.freWords)
        else:
            self.freWords = self.analyzer.convert_num_to_frequent(words)
        return self.freWords

    def FeGenerator(self):
        tulWords = [word for word in self.freWords.items()]
        featureGene = WordsFeatureGene(tulWords)
        textMsgs = [
            self.cVerTer.ConvertRawToLengthText(message, delimeter='_')
            for message in self.messages
        ]
        mFeatures = featureGene.cvtMsgs(textMsgs)
        self.features = mFeatures
        return mFeatures

    def clsMessages(self, wSize, TK, wLen, Kcls):
        self.cntWord(wSize, TK, wLen)
        self.cntPro()
        wFeatures = self.FeGenerator()
        clser = KmeansClasser()
        return clser.clsMessages(self.messages, wFeatures, Kcls)

    def clsByDbscan(self, wSize, TK, wLen, mindis, minpt):
        self.cntWord(wSize, TK, wLen)
        self.cntPro()
        wFeatures = self.FeGenerator()
        clser = DbScanClasser()
        return clser.clsMessages(self.messages, wFeatures, mindis, minpt)
Exemplo n.º 2
0
 def __init__(self, messages):
     self.messages = messages
     self.cVerTer = Converter()
     self.freWords = {}
     self.mger = WordsMerger()
     self.analyzer = base_analyzer()
     self.features = None
Exemplo n.º 3
0
 def __init__(self, messages=None):
     self.MaxLen = 40
     self.lengthThreshold = 0.8
     self.constThreshold = 0.98
     self.idThreshold = 0.7
     self.messages = messages
     self.cverter = Converter()
Exemplo n.º 4
0
 def __init__(self, messages=None):
     self.messages = messages
     self.wordTypeInfer = WholeFieldTypeInfer(self.messages)
     self.cvter = Converter()
     self.wcvter = word_convert()
     self.msgSplt = MsgSpliter()
     self.dataTuning = DataTuning()
     self.icsSymTree = IcsSymbolToTree()
Exemplo n.º 5
0
def raw_to_log(file_path, r_way, protocol):
    datas = read_datas(file_path, r_way)
    datas = get_puredatas(datas)
    raw_datas = []
    converter = Converter()
    logger_raw = get_logger(log_path + '/' + protocol, 'raw_message_logger')
    i = 0
    for data in datas:
        logger_raw.error(str(i) + ':' + converter.convert_raw_to_text(data))
Exemplo n.º 6
0
 def inferLen(self, datas, lenDatas):
     datasLenBig = Converter.bytesToBigInt(datas)
     datasLittle = Converter.bytesToLittleInt(datas)
     personBig = base_analyzer.pearson(datasLenBig, lenDatas)
     personLittle = base_analyzer.pearson(datasLittle, lenDatas)
     if personBig > self.lengthThreshold or personLittle > self.lengthThreshold:
         return 1
     else:
         return 0
Exemplo n.º 7
0
def raw_to_redis(file_path, r_way):
    datas = read_datas(file_path, r_way)
    datas = get_puredatas(datas)
    raw_datas = []
    converter = Converter()
    for data in datas:
        raw_datas.append(converter.convert_raw_to_text(data))
    key = file_path
    phrase_redis = redis_deal()
    phrase_redis.insert_to_redis(key, raw_datas)
Exemplo n.º 8
0
 def __init__(self):
     super().__init__()
     self.converter = Converter()
     self.msgSpliter = MsgSpliter()
     self.redis_dealer = redis_deal()
     self.splt = splitter()
     self.desiner = Desiner()
     self.msAb = MeasureAb()
     self.cvt = Converter()
     self.dataTuning = DataTuning()
Exemplo n.º 9
0
 def __init__(self):
     self.msgLogic = MegSplitLogic()
     self.modbus = ModBusDataTuning()
     self.md = modbus()
     self.anlzer = base_analyzer()
     self.ftp = FTPDataTuning()
     self.ftpPaser = FTPParser()
     self.cmPaser = ComPaser()
     self.cvt = Converter()
     self.mtool = MessageSplitMeasure()
     self.rds = redis_deal()
Exemplo n.º 10
0
def getDelimiter(datas):
    convert = Converter()
    messages = [convert.convert_raw_to_text(data) for data in datas]
    t_results = []
    for message in messages:
        t_results.extend(get_ngram_words([message], (1, 2), 10))
    words = analyzer.get_topk(t_results)[0:10]
    deliWords = filterWords(words)
    wordsList = [chr(int(word)) for word in deliWords.split(' ')]
    deliW = ''.join(wordsList)
    return deliW
Exemplo n.º 11
0
 def inferLen(self, Los=None, datas=None):
     if Los != None:
         datas = self.cverter.getDatasByLocs(self.messages, Los)
     lens = [len(data) for data in datas]
     datasLenBig = Converter.bytesToBigInt(datas)
     datasLittle = Converter.bytesToLittleInt(datas)
     personBig = base_analyzer.pearson(datasLenBig, lens)
     personLittle = base_analyzer.pearson(datasLittle, lens)
     if personBig > self.lengthThreshold or personLittle > self.lengthThreshold:
         return 1
     else:
         return 0
 def inferSeriesId(self, datas):
     ids = []
     for i, data in enumerate(datas):
         ids.append(i)
     datasBigInt = Converter.bytesToBigInt(datas)
     datasLittle = Converter.byteToLittle(datas)
     tRate = max(base_analyzer.pearson(ids, datasBigInt),
                 base_analyzer.pearson(ids, datasLittle))
     if (tRate > self.idThreshold):
         return 1
     else:
         return 0
Exemplo n.º 13
0
 def inferSeriesId(self, Los=None, datas=None):
     if Los != None:
         datas = self.cverter.getDatasByLocs(self.messages, Los)
     ids = []
     for i, data in enumerate(datas):
         ids.append(i)
     datasBigInt = Converter.bytesToBigInt(datas)
     datasLittle = Converter.bytesToLittleInt(datas)
     tRate = max(base_analyzer.pearson(ids, datasBigInt),
                 base_analyzer.pearson(ids, datasLittle))
     if (tRate > self.idThreshold):
         return 1
     else:
         return 0
Exemplo n.º 14
0
 def split_by_frequent(self, messages):
     prefix = ve_strategy().GetWordsKeys('FrequentWords')
     entry_words = None
     if self.redis_read.is_exist_key(prefix):
         frequent_words = self.redis_read.read_from_redis(prefix)
     else:
         raw_keys = ve_strategy().GetWordsKeys('RawWords')
         raw_words = self.redis_read.read_from_redis(raw_keys)
         frequent_words = Converter().ConvertRawToNormalFrequent(raw_words, self.parameters['height'] + 1)
         self.redis_read.insert_to_redis(prefix, frequent_words)
     frequent_voter = frequence_voter(frequent_words)
     PrimBorders = frequent_voter.vote_for_messages(messages, self.parameters['height'])
     FinalBorders = Desiner().VoteMultiM(PrimBorders, self.parameters['diff_measure'],
                                         self.parameters['decision_type'],
                                         self.parameters['Threshold_T'], self.parameters['Threshod_R'])
     return Converter().ConvertListToOrder(FinalBorders)
Exemplo n.º 15
0
 def inferConst(self, datas):
     wordDic = Converter.convert_raw_to_count(datas)
     wordDic = sorted(wordDic.items(), key=lambda x: x[1])
     if (wordDic[-1][1] / len(datas) > self.constThreshold):
         return 1
     else:
         return 0
Exemplo n.º 16
0
 def find_constone(self, datas):
     wordDic = Converter.convert_raw_to_count(datas)
     wordDic = sorted(wordDic.items(), key=lambda x: x[1])
     if (wordDic[0][1] > self.constThreshold):
         return 1
     else:
         return 0
Exemplo n.º 17
0
 def upDataByType(self):
     datas = self.getNodeDataV()
     if self.word_type == 'C':
         self.value.append(datas[0])
     elif self.word_type == 'F':
         dicDatas = Converter().convert_raw_to_count(datas)
         for key in dicDatas:
             self.value.append(key)
Exemplo n.º 18
0
 def find_constfunc(self, datas):
     """
     get the feature of the function code
     :param datas: List of bytes
     :return: entry and distinct num of datas
     """
     t_l = Converter.convert_raw_to_count(datas)
     t_en = base_analyzer.get_entry([value for value in t_l.values()])
     return t_en, len(t_l)
Exemplo n.º 19
0
 def getFuncScore(self, Los=None, datas=None):
     if Los != None:
         datas = self.cverter.getDatasByLocs(self.messages, Los)
     datasDic = Converter.convert_raw_to_count(datas)
     sumValue = 0
     for value in datasDic.values():
         sumValue = sumValue + value
     datas = [data / sumValue for data in datasDic.values()]
     datasEntry = base_analyzer.get_entry(datas)
     return datasEntry, len(datasDic)
Exemplo n.º 20
0
 def split_by_words_type(self, datas, T_max_range):
     fields_set = []
     w_infer = word_infer()
     w_merger = base_merger()
     w_convert = Converter()
     b_analyzer = base_analyzer()
     for i in range(T_max_range):
         lo_datas = get_data_bylo(datas, i)
         w_cnt = w_convert.convert_raw_to_count(lo_datas)
         w_frequent = b_analyzer.convert_num_to_frequent(w_cnt)
         w_type = w_infer.is_const_word(w_frequent, 0.95)
         if w_type:
             t_field = loc_field((i,i), 0)
         else:
             t_field = loc_field((i,i), 4)
         fields_set.append(t_field)
     words_f = w_merger.merge_words(fields_set)
     candidate_borders = [w.loc[0] for w in words_f]
     return words_f, candidate_borders
Exemplo n.º 21
0
 def inferFunc(self, datas):
     datasDic = Converter.convert_raw_to_count(datas)
     sumValue = 0
     for value in datasDic.values():
         sumValue = sumValue + value
     datas = [data / sumValue for data in datasDic.values()]
     datasEntry = base_analyzer.get_entry(datas)
     if len(datasDic) < self.FuncT:
         return 1
     else:
         return 0
Exemplo n.º 22
0
class FieldHunter:
    def __init__(self):
        self.analyer = base_analyzer()
        self.convert = Converter()
        self.ranker = ranker()

    def itemJudge(self, item):
        if int(item) >= 48 and int(item) <= 57:
            return True
        if int(item) >= 65 and int(item) <= 90:
            return True
        if int(item) >= 97 and int(item) <= 122:
            return True
        return False

    def isNumOrAlpha(self, sequence):
        chars = sequence.split(' ')
        isNumAlpha = False
        for item in chars:
            if self.itemJudge(item):
                isNumAlpha = True
                break
        return isNumAlpha

    def findDelimiter(self, messages):
        messages = [self.convert.convert_raw_to_text(data) for data in messages]
        wordsNgram = self.convert.ConvertRawToSimDic(messages, (1, 2))
        wordsNgram = self.ranker.rank_dic(wordsNgram, reverse=True)
        #print(wordsNgram)
        delimiter = None
        for word in wordsNgram:
            if not self.isNumOrAlpha(word[0]):
                delimiter = word
                break
        candidates = []
        for word in wordsNgram:
            if not self.isNumOrAlpha(word[0]):
                candidates.append(word[0])
        print(candidates[0:100])
        print('cccd')
        return candidates[0:100]
Exemplo n.º 23
0
 def inferConst(self, Los=None, datas=None):
     #print(Los)
     if Los != None:
         datas = self.cverter.getDatasByLocs(self.messages, Los)
     #print(datas)
     wordDic = Converter.convert_raw_to_count(datas)
     wordDic = sorted(wordDic.items(), key=lambda x: x[1])
     #print(wordDic)
     if (wordDic[-1][1] / len(datas) > self.constThreshold):
         return 1
     else:
         return 0
Exemplo n.º 24
0
 def getBoundaries(self, configParas, gveConfigParas, messages):
     freGVotes, entryGVotes = self.getGVotes(configParas, messages)
     desiner = Desiner()
     paraFre = {}
     paraFre['diff_measure'] = gveConfigParas['diffMeasure']
     paraFre['vWay'] = gveConfigParas['vWayFre']
     paraFre['T'] = gveConfigParas['T']
     paraFre['r'] = gveConfigParas['r']
     freBoundaries = desiner.VoteSingleByDicParas(paraFre, freGVotes)
     paraFre['vWay'] = gveConfigParas['vWayEntry']
     entryBoundaries = desiner.VoteSingleByDicParas(paraFre, entryGVotes)
     return Converter().MergeLists(freBoundaries, entryBoundaries)
Exemplo n.º 25
0
 def inferLenAccau(self, Los=None, datas=None):
     if Los != None:
         datas = self.cverter.getDatasByLocs(self.messages, Los)
     lens = []
     for msg in self.messages:
         if len(msg) > Los[-1]:
             lens.append(len(msg) - Los[-1])
         else:
             lens.append(-1)
     datasLenBig = Converter.bytesToBigInt(datas)
     datasLittle = Converter.bytesToLittleInt(datas)
     acc_big = 0
     for i in range(len(datasLenBig)):
         if (abs((datasLenBig[i] - lens[i])) <= 1):
             acc_big = acc_big + 1
     acc_small = 0
     for i in range(len(datasLittle)):
         if (abs((datasLittle[i] - lens[i])) <= 1):
             acc_small = acc_small + 1
     if ((acc_small / len(datas)) > self.lengthThreshold
             or (acc_big / len(datas)) > self.lengthThreshold):
         return 1
     else:
         return 0
Exemplo n.º 26
0
 def ConvertRawWordsToOrder(self, rawwords, nrange, ordertype="abs"):
     Analyzer = base_analyzer()
     WordRanker = ranker()
     Converter = word_convert()
     num_words = Converter.splitwords_bylen(rawwords, nrange)
     for len_word in num_words:
         num_words[len_word] = WordRanker.rank_tulple(num_words[len_word],
                                                      reverse=True)
     PrimeWords = [word[0] for word in num_words[4]]
     PrimeOrders = {}
     for i in range(len(PrimeWords)):
         PrimeOrders[PrimeWords[i]] = i
     OrderWords = {}
     OrderWords[4] = PrimeOrders
     start_time = time.time()
     for i in range(1, nrange - 1):
         if ordertype == 'abs':
             OrderWords[i] = self.ConvertWordToNumOrder(
                 [word[0] for word in num_words[i]], PrimeWords, rawwords)
         else:
             OrderWords[i] = Converter.convert_word_order(
                 [word[0] for word in num_words[i]], PrimeWords)
     OrderWords = self.convert_order_to_raw(OrderWords)
     return OrderWords
Exemplo n.º 27
0
 def SplitByOrder(self, messages):
     key = ve_strategy().GetWordsKeys('OrderWords')
     if self.redis_read.is_exist_key(key):
         OrderWords = self.redis_read.read_from_redis(key)
     else:
         raw_keys = ve_strategy().GetWordsKeys('RawWords')
         raw_words = self.redis_read.read_from_redis(raw_keys)
         OrderWords = word_convert().ConvertRawWordsToOrder(raw_words, self.parameters['height'] + 1)
         self.redis_read.insert_to_redis(key, OrderWords)
     orderVoter = OrderVoter(OrderWords)
     PrimBorders = orderVoter.vote_for_messages(messages, self.parameters['height'])
     FinalBorders = Desiner().VoteMultiM(PrimBorders, self.parameters['diff_measure'],
                                         self.parameters['decision_type'],
                                         self.parameters['Threshold_T'], self.parameters['Threshod_R'])
     return Converter().ConvertListToOrder(FinalBorders)
Exemplo n.º 28
0
 def splitMessage(self, boundary, message, maxRange):
     hexData = Converter.byteListToHex(message)
     splitMsg = ''
     los = 0
     startLo = 0
     hexDatas = hexData.split(' ')
     if boundary[los] == 0:
         los = los + 1
     for i in range(min(len(hexDatas), maxRange)):
         if los >= len(boundary):
             splitMsg = splitMsg + hexDatas[i] + ' '
             continue
         splitMsg = splitMsg + hexDatas[i] + ' '
         if i + 1 == boundary[los]:
             splitMsg = splitMsg + '|'
             los = los + 1
     return splitMsg
Exemplo n.º 29
0
 def splitMessageByType(self, boundary, message):
     hexData = Converter.byteListToHex(message)
     splitMsg = ''
     los = 0
     spltMsgs = []
     hexDatas = hexData.split(' ')
     if boundary[los] == 0:
         los = los + 1
     for i in range((len(hexDatas))):
         if los >= len(boundary):
             splitMsg = splitMsg + hexDatas[i] + ' '
             continue
         splitMsg = splitMsg + hexDatas[i] + ' '
         if i + 1 == boundary[los]:
             spltMsgs.append(splitMsg)
             splitMsg = ''
             los = los + 1
     return spltMsgs
Exemplo n.º 30
0
 def split_by_entry(self, messages):
     keys = ve_strategy().GetWordsKeys("EntryWords")
     entry_words = None
     if self.redis_read.is_exist_key(keys):
         entry_words = self.redis_read.read_from_redis(keys)
     else:
         raw_keys = ve_strategy().GetWordsKeys("RawWords")
         raw_words = self.redis_read.read_from_redis(raw_keys)
         entry_words = word_convert().convert_raw_to_entry(
             raw_words, self.parameters['height'] + 1)
         self.redis_read.insert_to_redis(keys, entry_words)
     entry_voter = Entry_voter(entry_words)
     PrimBorders = entry_voter.vote_for_messages(messages,
                                                 self.parameters['height'])
     FinalBorders = Desiner().VoteMultiM(PrimBorders,
                                         self.parameters['diff_measure'],
                                         self.parameters['decision_type'],
                                         self.parameters['Threshold_T'],
                                         self.parameters['Threshod_R'])
     return Converter().ConvertListToOrder(FinalBorders)