Пример #1
0
def orderTemplate(para):
    rawlog = para['rawlog']
    templates = para['templates']
    sequences = para['sequences']
    order_templates = para['order_templates']
    variable_symbol = para['variable_symbol']
    # remove_middle = para['remove_middle']

    tag_index={}
    index_tag={}
    tag_temp={}
    tag_log={}

    index=0
    with open(sequences) as IN:
        for line in IN:
            tag = line.strip()
            # print(tag)
            if tag not in tag_index:
                #print(tag)
                tag_index[tag]=index
                index_tag[index]=tag
            index+=1


    index=0
    with open(rawlog) as IN:
        for line in IN:
            if index in index_tag:
                tag_log[index_tag[index]]=line.strip()
            index+=1

    tag=1
    with open(templates) as IN:
        for line in IN:
            tag_temp[str(tag)]=line.strip()
            tag+=1

    f=open(order_templates,'w')
    for i in range(len(tag_temp)):
        tag=str(i+1)
        out=' '.join(list(set(tag_temp[tag].split())))
        if tag in tag_log:
            # find the correspondent raw log
            log=getMsgFromNewSyslog(tag_log[tag])[1]
            # print(log)
            # find the correspondent template
            temp=tag_temp[tag].split()
            new_temp=[]
            for k in log :
                if k in temp:
                    new_temp.append(k)
                    temp.remove(k)
                else:
                    new_temp.append(variable_symbol)
            # modify the template
            out = ' '.join(new_temp)
        f.writelines(out+'\n')
    print('template_path', order_templates)
Пример #2
0
    def matchLogsFromFile(self, para):
        '''
         如果没匹配上,会生成0, 原始代码
        '''
        #print('#######################################')
        if para['plot_flag'] == 1:
            #print('#######################################')
            self.drawTree()  #画ft-tree

        raw_log_path = para['runtime_log_path']
        out_seq_path = para['out_seq_path']
        short_threshold = para['short_threshold']
        template_path = para['template_path']
        match_model = para['match_model']

        f = open(out_seq_path, 'w')
        short_log = 0
        # short_threshold = 5
        count_zero = 0
        total_num = 0
        with open(raw_log_path) as IN:
            for line in IN:
                total_num += 1
                timestamp = line.strip().split()[0]
                log_words = ft_tree.getMsgFromNewSyslog(line)[1]

                tag, cur_match = self.match(log_words, match_model)
                if len(log_words) < short_threshold:  # 过滤长度小于5的日志
                    short_log += 1
                    tag = -1

                # 匹配到了输出1~n,没匹配到输出0,日志小于过滤长度输出-1
                #输出时间戳
                # f.writelines(timestamp + ' ' + str(tag) + '\n')
                f.writelines(str(tag) + '\n')
                if tag == 0:
                    count_zero += 1
                    # print line

        print('filting # short logs:', short_log, '| threshold =',
              short_threshold)
        print('# of unmatched log (except filting):', count_zero)
        print('# of total logs:', total_num)
        print('seq_file_path:', out_seq_path)
Пример #3
0
    def match(self, log_words, match_model=0):
        '''
            输入是list跟string都可以!

            log_words = ft_tree.getMsgFromNewSyslog(log)[1]
            匹配到返回tag,没匹配到返回0

        '''
        #鲁棒,输入str也是可以的
        words = []
        if type(log_words) == type(''):
            log_words = ft_tree.getMsgFromNewSyslog(log_words)[1]

        if match_model == 4:
            for word in log_words:
                words.append(word)
            #print('-------------------no sorting-----------------------')
        else:
            #sort raw log
            words_index = {}
            for word in log_words:
                if word in self.words_frequency:
                    words_index[word] = self.words_frequency.index(word)
                # else:
                #     print(word,'not in the dict')
            words = [
                x[0] for x in sorted(words_index.items(), key=lambda x: x[1])
            ]
            #print('-------------------after sorting-----------------------')
        #print(words)
        cur_match = []
        cur_node = self.tree.tree_list['']._head
        for word in words:
            if cur_node.find_child_node(word) != None:
                cur_node = cur_node.find_child_node(word)
                cur_match.append(word)
        cur_match = ' '.join(cur_match)  #
        # print(cur_match+"\n")
        #匹配不到的话 输出0
        tag = self.template_tag_dir[
            cur_match] if cur_match in self.template_tag_dir else 0

        return tag, cur_match
Пример #4
0
    def LearnTemplateByIntervals(self, para):
        '''
            增量学习模板
            每一时段增量学习一次
        '''
        # print (para)
        template_path = para['template_path']
        new_logs_path = para['log_path']
        leaf_num = para['leaf_num']
        short_threshold = para['short_threshold']
        match_model = para['match_model']

        f = open(template_path, 'a')
        short_log = 0
        count_zero = 0
        total_num = 0
        # print('template_tag_dir:',self.template_tag_dir)

        with open(new_logs_path) as IN:
            for line in IN:
                total_num += 1
                timestamp = line.strip().split()[0]
                log_words = ft_tree.getMsgFromNewSyslog(line)[1]
                tag, cur_match = self.match(log_words, match_model)
                # print (line.strip())
                # print ('~~cur_match:',cur_match)
                # print ('')
                if len(log_words) < short_threshold:  #过滤长度小于5的日志
                    short_log += 1
                    tag = -1

                #如果匹配不上,则增量学习模板
                if tag == 0:
                    # print ('learned a new template:')
                    count_zero += 1
                    #增量学习
                    # temp_tree=self.tree
                    cur_log_once_list = [['', log_words]]
                    self.tree.auto_temp(cur_log_once_list,
                                        self.words_frequency, para)

    # 遍历特征树,每条路径作为一个模板
        all_paths = {}

        for pid in self.tree.tree_list:
            all_paths[pid] = []
            path = self.tree.traversal_tree(self.tree.tree_list[pid])

            for template in path[1]:
                all_paths[pid].append(template)

            # 大集合优先
            # 有的模板是另外一个模板的子集,此时要保证大集合优先`
            all_paths[pid].sort(key=lambda x: len(x), reverse=True)
        # count=0

        typeList = []
        # 将每条模板存储到对应的pid文件夹中

        i = 1
        print('new templates:')
        for pid in all_paths:
            for path in all_paths[pid]:
                print(i, pid, end=' ')
                # 首先把pid保存下来
                cur_match = ' '.join(path)
                for w in path:
                    print(w, end=' ')
                print('')
                i += 1
                # if True:
                if cur_match not in self.template_tag_dir:
                    tag = len(self.template_tag_dir) + 1
                    self.template_tag_dir[cur_match] = tag
                    f.writelines(str(tag) + ' ' + cur_match + '\n')
                    print(cur_match)

        with open(new_logs_path) as IN:
            for line in IN:
                total_num += 1
                timestamp = line.strip().split()[0]
                log_words = ft_tree.getMsgFromNewSyslog(line)[1]
                tag, cur_match = self.match(log_words)
                # print (tag, cur_match)
        if para['plot_flag'] == 1:
            self.drawTree()

        print('filting # short logs:', short_log, '| threshold =',
              short_threshold)
        print('# of unmatched log (except filting):', count_zero)
        print('# of total logs:', total_num)
Пример #5
0
    def matchLogsAndLearnTemplateOneByOne(self, para):
        '''
            增量学习模板
            如果没匹配上,会生成新的模板,然后返回新的模板号
            每条日志单条学习,流式数据学习
        '''
        template_path = para['template_path']
        new_logs_path = para['log_path']
        out_seq_path = para['out_seq_path']
        short_threshold = para['short_threshold']
        match_model = para['match_model']

        f = open(out_seq_path, 'w')
        short_log = 0
        # short_threshold = 5
        count_zero = 0
        total_num = 0
        with open(new_logs_path) as IN:
            for line in IN:
                total_num += 1
                timestamp = line.strip().split()[0]
                log_words = ft_tree.getMsgFromNewSyslog(line)[1]
                tag, cur_match = self.match(log_words, match_model)
                # print (line.strip())
                # print ('~~cur_match:',cur_match)
                # print ('')
                if len(log_words) < short_threshold:  #过滤长度小于5的日志
                    short_log += 1
                    tag = -1

                #如果匹配不上,则增量学习模板
                if tag == 0:
                    print('learned a new template:')
                    count_zero += 1
                    #增量学习
                    # temp_tree=self.tree
                    print(line)
                    cur_log_once_list = [['', log_words]]
                    self.tree.auto_temp(cur_log_once_list,
                                        self.words_frequency, para)
                    new_tag = len(self.template_tag_dir) + 1

                    #添加完新的模板之后,重新匹配日志,把新的模板match到的文本输出出来
                    tag, cur_match = self.match(log_words)
                    self.template_tag_dir[cur_match] = new_tag
                    self.tag_template_dir[new_tag] = cur_match
                    #第三次匹配模板,输出目前匹配的tag
                    tag, cur_match = self.match(log_words)
                    #                    self.drawTree()
                    print(tag, cur_match)
                    # print ('')
                    #保存新的模板
                    ff = open(template_path, 'a')
                    ff.writelines(str(tag) + ' ' + cur_match + '\n')
                    ff.close()
                #匹配到了输出1~n,没匹配到输出新增量学习的模板号,日志小于过滤长度输出-1
                f.writelines(timestamp + ' ' + str(tag) + '\n')

        print('filting # short logs:', short_log, '| threshold =',
              short_threshold)
        print('# of unmatched log (except filting):', count_zero)
        print('# of total logs:', total_num)
        print('seq_file_path:', out_seq_path)

        if para['plot_flag'] == 1:
            self.drawTree()