def orderTemplate(para): rawlog = para['rawlog'] templates = para['templates'] sequences = para['sequences'] order_templates = para['order_templates'] variable_symbol = para['variable_symbol'] # remove_middle = para['remove_middle'] tag_index={} index_tag={} tag_temp={} tag_log={} index=0 with open(sequences) as IN: for line in IN: tag = line.strip() # print(tag) if tag not in tag_index: #print(tag) tag_index[tag]=index index_tag[index]=tag index+=1 index=0 with open(rawlog) as IN: for line in IN: if index in index_tag: tag_log[index_tag[index]]=line.strip() index+=1 tag=1 with open(templates) as IN: for line in IN: tag_temp[str(tag)]=line.strip() tag+=1 f=open(order_templates,'w') for i in range(len(tag_temp)): tag=str(i+1) out=' '.join(list(set(tag_temp[tag].split()))) if tag in tag_log: # find the correspondent raw log log=getMsgFromNewSyslog(tag_log[tag])[1] # print(log) # find the correspondent template temp=tag_temp[tag].split() new_temp=[] for k in log : if k in temp: new_temp.append(k) temp.remove(k) else: new_temp.append(variable_symbol) # modify the template out = ' '.join(new_temp) f.writelines(out+'\n') print('template_path', order_templates)
def matchLogsFromFile(self, para): ''' 如果没匹配上,会生成0, 原始代码 ''' #print('#######################################') if para['plot_flag'] == 1: #print('#######################################') self.drawTree() #画ft-tree raw_log_path = para['runtime_log_path'] out_seq_path = para['out_seq_path'] short_threshold = para['short_threshold'] template_path = para['template_path'] match_model = para['match_model'] f = open(out_seq_path, 'w') short_log = 0 # short_threshold = 5 count_zero = 0 total_num = 0 with open(raw_log_path) as IN: for line in IN: total_num += 1 timestamp = line.strip().split()[0] log_words = ft_tree.getMsgFromNewSyslog(line)[1] tag, cur_match = self.match(log_words, match_model) if len(log_words) < short_threshold: # 过滤长度小于5的日志 short_log += 1 tag = -1 # 匹配到了输出1~n,没匹配到输出0,日志小于过滤长度输出-1 #输出时间戳 # f.writelines(timestamp + ' ' + str(tag) + '\n') f.writelines(str(tag) + '\n') if tag == 0: count_zero += 1 # print line print('filting # short logs:', short_log, '| threshold =', short_threshold) print('# of unmatched log (except filting):', count_zero) print('# of total logs:', total_num) print('seq_file_path:', out_seq_path)
def match(self, log_words, match_model=0): ''' 输入是list跟string都可以! log_words = ft_tree.getMsgFromNewSyslog(log)[1] 匹配到返回tag,没匹配到返回0 ''' #鲁棒,输入str也是可以的 words = [] if type(log_words) == type(''): log_words = ft_tree.getMsgFromNewSyslog(log_words)[1] if match_model == 4: for word in log_words: words.append(word) #print('-------------------no sorting-----------------------') else: #sort raw log words_index = {} for word in log_words: if word in self.words_frequency: words_index[word] = self.words_frequency.index(word) # else: # print(word,'not in the dict') words = [ x[0] for x in sorted(words_index.items(), key=lambda x: x[1]) ] #print('-------------------after sorting-----------------------') #print(words) cur_match = [] cur_node = self.tree.tree_list['']._head for word in words: if cur_node.find_child_node(word) != None: cur_node = cur_node.find_child_node(word) cur_match.append(word) cur_match = ' '.join(cur_match) # # print(cur_match+"\n") #匹配不到的话 输出0 tag = self.template_tag_dir[ cur_match] if cur_match in self.template_tag_dir else 0 return tag, cur_match
def LearnTemplateByIntervals(self, para): ''' 增量学习模板 每一时段增量学习一次 ''' # print (para) template_path = para['template_path'] new_logs_path = para['log_path'] leaf_num = para['leaf_num'] short_threshold = para['short_threshold'] match_model = para['match_model'] f = open(template_path, 'a') short_log = 0 count_zero = 0 total_num = 0 # print('template_tag_dir:',self.template_tag_dir) with open(new_logs_path) as IN: for line in IN: total_num += 1 timestamp = line.strip().split()[0] log_words = ft_tree.getMsgFromNewSyslog(line)[1] tag, cur_match = self.match(log_words, match_model) # print (line.strip()) # print ('~~cur_match:',cur_match) # print ('') if len(log_words) < short_threshold: #过滤长度小于5的日志 short_log += 1 tag = -1 #如果匹配不上,则增量学习模板 if tag == 0: # print ('learned a new template:') count_zero += 1 #增量学习 # temp_tree=self.tree cur_log_once_list = [['', log_words]] self.tree.auto_temp(cur_log_once_list, self.words_frequency, para) # 遍历特征树,每条路径作为一个模板 all_paths = {} for pid in self.tree.tree_list: all_paths[pid] = [] path = self.tree.traversal_tree(self.tree.tree_list[pid]) for template in path[1]: all_paths[pid].append(template) # 大集合优先 # 有的模板是另外一个模板的子集,此时要保证大集合优先` all_paths[pid].sort(key=lambda x: len(x), reverse=True) # count=0 typeList = [] # 将每条模板存储到对应的pid文件夹中 i = 1 print('new templates:') for pid in all_paths: for path in all_paths[pid]: print(i, pid, end=' ') # 首先把pid保存下来 cur_match = ' '.join(path) for w in path: print(w, end=' ') print('') i += 1 # if True: if cur_match not in self.template_tag_dir: tag = len(self.template_tag_dir) + 1 self.template_tag_dir[cur_match] = tag f.writelines(str(tag) + ' ' + cur_match + '\n') print(cur_match) with open(new_logs_path) as IN: for line in IN: total_num += 1 timestamp = line.strip().split()[0] log_words = ft_tree.getMsgFromNewSyslog(line)[1] tag, cur_match = self.match(log_words) # print (tag, cur_match) if para['plot_flag'] == 1: self.drawTree() print('filting # short logs:', short_log, '| threshold =', short_threshold) print('# of unmatched log (except filting):', count_zero) print('# of total logs:', total_num)
def matchLogsAndLearnTemplateOneByOne(self, para): ''' 增量学习模板 如果没匹配上,会生成新的模板,然后返回新的模板号 每条日志单条学习,流式数据学习 ''' template_path = para['template_path'] new_logs_path = para['log_path'] out_seq_path = para['out_seq_path'] short_threshold = para['short_threshold'] match_model = para['match_model'] f = open(out_seq_path, 'w') short_log = 0 # short_threshold = 5 count_zero = 0 total_num = 0 with open(new_logs_path) as IN: for line in IN: total_num += 1 timestamp = line.strip().split()[0] log_words = ft_tree.getMsgFromNewSyslog(line)[1] tag, cur_match = self.match(log_words, match_model) # print (line.strip()) # print ('~~cur_match:',cur_match) # print ('') if len(log_words) < short_threshold: #过滤长度小于5的日志 short_log += 1 tag = -1 #如果匹配不上,则增量学习模板 if tag == 0: print('learned a new template:') count_zero += 1 #增量学习 # temp_tree=self.tree print(line) cur_log_once_list = [['', log_words]] self.tree.auto_temp(cur_log_once_list, self.words_frequency, para) new_tag = len(self.template_tag_dir) + 1 #添加完新的模板之后,重新匹配日志,把新的模板match到的文本输出出来 tag, cur_match = self.match(log_words) self.template_tag_dir[cur_match] = new_tag self.tag_template_dir[new_tag] = cur_match #第三次匹配模板,输出目前匹配的tag tag, cur_match = self.match(log_words) # self.drawTree() print(tag, cur_match) # print ('') #保存新的模板 ff = open(template_path, 'a') ff.writelines(str(tag) + ' ' + cur_match + '\n') ff.close() #匹配到了输出1~n,没匹配到输出新增量学习的模板号,日志小于过滤长度输出-1 f.writelines(timestamp + ' ' + str(tag) + '\n') print('filting # short logs:', short_log, '| threshold =', short_threshold) print('# of unmatched log (except filting):', count_zero) print('# of total logs:', total_num) print('seq_file_path:', out_seq_path) if para['plot_flag'] == 1: self.drawTree()