#!/usr/bin/env python import sys sys.path.append('../') from logparser import LenMa input_dir = '../logs/HDFS/' # The input directory of log file output_dir = 'Lenma_result/' # The output directory of parsing results log_file = 'HDFS_2k.log' # The input log file name log_format = '<Date> <Time> <Pid> <Level> <Component>: <Content>' # HDFS log format threshold = 0.9 # TODO description (default: 0.9) regex = [] # Regular expression list for optional preprocessing (default: []) parser = LenMa.LogParser(input_dir, output_dir, log_format, threshold=threshold, rex=regex) parser.parse(log_file)
'log_format': '<Month> <Date> <Time> <User> <Component>\[<PID>\]( \(<Address>\))?: <Content>', 'regex': [r'([\w-]+\.){2,}[\w-]+'], 'threshold': 0.86 }, } bechmark_result = [] for dataset, setting in benchmark_settings.items(): print('\n=== Evaluation on %s ===' % dataset) indir = os.path.join(input_dir, os.path.dirname(setting['log_file'])) log_file = os.path.basename(setting['log_file']) parser = LenMa.LogParser(log_format=setting['log_format'], indir=indir, outdir=output_dir, rex=setting['regex'], threshold=setting['threshold']) parser.parse(log_file) accuracy_PA, accuracy_exact_string_matching, edit_distance_result, edit_distance_result_median = evaluator.evaluate( groundtruth=os.path.join(indir, log_file + '_structured.csv'), parsedresult=os.path.join(output_dir, log_file + '_structured.csv')) bechmark_result.append([ dataset, accuracy_PA, accuracy_exact_string_matching, edit_distance_result, edit_distance_result_median ]) print('\n=== Overall evaluation results ===') df_result = pd.DataFrame(bechmark_result, columns=[
import collections import os import pandas as pd from benchmark.Lenma_benchmark import benchmark_settings from logparser import LenMa from File_Info import get_file_size dataset = 'BGL' output_dir = 'LenMa_result/' # The output directory of parsing results one_setting = benchmark_settings[dataset] log_file = os.path.basename(one_setting['log_file']) input_dir = os.path.join('../logs/', os.path.dirname(one_setting['log_file'])) file_size = get_file_size(dataset) results = collections.defaultdict(list) for size, file in file_size.items(): parser = LenMa.LogParser( input_dir, output_dir, log_format=one_setting['log_format'], threshold=one_setting['threshold'], rex=one_setting['regex'], ) time_elapsed = parser.parse(file) results['size'].append(size) results['time'].append(time_elapsed.total_seconds()) print(results['time']) pd.DataFrame(results).to_csv('./LenMa_%s.csv' % dataset)